In [None]:
"""
@authors: faurand, chardes, ehagensieker
"""
import tensorflow_datasets as tfds
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
import math
import datetime
import tqdm
import pprint

# in a notebook, load the tensorboard extension, not needed for scripts
%load_ext tensorboard

The tensorboard extension is already loaded. To reload it, use:
  %reload_ext tensorboard


In [None]:
train_ds, test_ds = tfds.load('mnist', split=['train', 'test'], as_supervised=True)

In [None]:
#Warning: this version simplifies the procedure by extracting only the targets!
# ds_train, ds_test = tfds.load('mnist', split=['train', 'test'], as_supervised=True)
def cumsum_dataset(ds, seq_len):
  #only get the targets, to keep this demonstration simple (and force students to understand the code if they are using it by rewriting it respectively)
  ds = ds.map(lambda x, t: tf.cast(t, dtype=tf.dtypes.int32))
  # use window to create subsequences. This means ds is not a dataset of datasets, i.e. every single entry in the dataset is itself a small tf.data.Dataset object with seq_len many entries!
  ds = ds.window(seq_len)
  #make sure to check tf.data.Dataset.scan() to understand how this works!
  def alternating_scan_function(state, elem):
    #state is allways the sign to use!
    old_sign = state
    #just flip the sign for every element
    new_sign = old_sign*-1
    #elem is just the target of the element. We need to apply the appropriate sign to it!
    signed_target = elem*old_sign
    #we need to return a tuple for the scan function: The new state and the output element
    out_elem = signed_target
    new_state = new_sign
    return new_state, out_elem
  #we now want to apply this function via scanning, resulting in a dataset where the signs are alternating
  #remember we have a dataset, where each element is a sub dataset due to the windowing!
  ds = ds.map(lambda sub_ds: sub_ds.scan(initial_state=1, scan_func=alternating_scan_function))
  #now we need a scanning function which implements a cumulative sum, very similar to the cumsum used above
  def scan_cum_sum_function(state, elem):
    #state is the sum up the the current element, element is the new digit to add to it
    sum_including_this_elem = state+elem
    #both the element at this position and the returned state should just be sum up to this element, saved in sum_including_this_elem
    return sum_including_this_elem, sum_including_this_elem
  #again we want to apply this to the subdatasets via scan, with a starting state of 0 (sum before summing is zero...)
  ds = ds.map(lambda sub_dataset: sub_dataset.scan(initial_state=0, scan_func=scan_cum_sum_function))
  #finally we need to create a single element from everything in the subdataset
  ds = ds.map(lambda sub_dataset: sub_dataset.batch(seq_len).get_single_element())

  
  return ds

# SEQ_LEN = 10
# for elem in ds_train.apply(lambda dataset: cumsum_dataset(dataset, SEQ_LEN)).take(10):
#   print(elem)


In [None]:
def new_target_fnc(ds, sequence_len):

  l = list()
  for i, elem in enumerate(ds):
    if (i % sequence_len) == 0:
      l.append(int(elem[1]))
    else:
      if (i % 2) == 0:
        l.append(int(l[i-1] + elem[1]))
      else:
        l.append(int(l[i-1] - elem[1]))
  return l
     

In [None]:
def prepare_data(data,batch_size = 32, seq_length = 4, augmentation = None): 
    '''
    prepare the dataset to have one-hot-vectors and values between -1 and 1
    '''
    data = data.map(lambda img, target: (img[0], target))
    data = data.map(lambda img, target: (tf.reshape(img, (-1,)), target))
    #convert data from uint8 to float32
    data = data.map(lambda img, target: (tf.cast(img, tf.float32), tf.cast(target, tf.int32)))
    #normalization to have input of range [-1,1]
    data = data.map(lambda img, target: ((img/128.)-1., target))



    data = data.batch(seq_length).cache().shuffle(1028).batch(batch_size).prefetch(tf.data.AUTOTUNE)
        
    return data 

seq_len = 4

# train_targets = tf.data.Dataset.from_tensor_slices(cumsum_dataset(train_ds, 4))
# test_targets = tf.data.Dataset.from_tensor_slices(cumsum_dataset(test_ds, 4))


#Zip up datasets and new targets
# train_ds = tf.data.Dataset.zip((train_ds, train_targets))
# test_ds = tf.data.Dataset.zip((test_ds, test_targets))


#Prepare the new datasets
# train = prepare_data(train_ds)
# test = prepare_data(test_ds)

# train_targets = cumsum_dataset(train, seq_len)
# test_targets = cumsum_dataset(test, seq_len)


In [None]:
for e in train.take(1):
  #print(train_ds.take(1))
  print(e[1])

tf.Tensor(
[[1 0 6 7]
 [2 7 9 6]
 [2 7 1 6]
 [1 5 5 4]
 [4 2 1 5]
 [1 0 4 9]
 [7 3 3 5]
 [0 6 9 8]
 [4 0 7 5]
 [1 7 9 0]
 [1 8 6 6]
 [8 1 8 7]
 [7 4 8 5]
 [7 7 3 3]
 [6 8 3 6]
 [7 0 2 1]
 [7 5 7 4]
 [0 6 6 9]
 [5 9 6 6]
 [9 9 1 7]
 [9 1 3 6]
 [1 5 8 3]
 [2 9 4 6]
 [5 0 2 5]
 [7 5 9 6]
 [3 4 6 3]
 [2 3 0 0]
 [0 6 2 4]
 [5 0 5 7]
 [6 0 0 9]
 [2 1 3 1]
 [6 8 7 7]], shape=(32, 4), dtype=int32)


In [None]:
from tensorflow.keras.layers import Dense
class BasicCNN(tf.keras.Model):
    def __init__(self):
        super(BasicCNN, self).__init__()
        self.layer=tf.keras.layers.Conv2D(32, 3, padding = 'same', activation="relu")
        
        self.output_basic = tf.keras.layers.TimeDistributed(tf.keras.layers.GlobalAvgPool2D())

    def call(self, sequence, training=False):
        # t=0 - just zeros in first time step 
        # first time step with all features and add the initial state 
        layer_state_t0 = self.layer(sequence[:,0,:] + tf.zeros_like(sequence[:,0,:]))
        #apply dense layer again with the second time step 
        layer_state_t1 = self.layer(layer_state_t0 + sequence[:,1,:])
        layer_state_t2 = self.layer(layer_state_t1 + sequence[:,2,:])
        layer_state_t3 = self.layer(layer_state_t2 + sequence[:,3,:])
        
        return self.output_layer(layer_state_t3)

In [None]:
from tensorflow.keras.layers import Dense
class LSTMWrapper(tf.keras.layers.RNN):
    
    def __init__(self, AbstractRNNCelllayer, return_sequences = True):
        super(LSTMWrapper).__init__()
        
        self.cell = AbstractRNNCelllayer

    def call(self, seq_len, data, hs, cs):
        out = []
        hidden_state = hs
        cell_state = cs
        for t in range(seq_len):
            hidden_state, cell_state = self.cell(data[:,t,:], hidden_state,cell_state)
            out.append(hidden_state)
        
        return out

    def zeros(self, batch_size):
        return tf.zeros((batch_size, self.cell.units), dtype=tf.float32)

In [None]:
from tensorflow.keras.layers import Dense
class AbstractRNNCelllayer(tf.keras.layers.AbstractRNNCell):
# can be arbitrarily complex
    def __init__(self, units, **kwargs):
        super(AbstractRNNCelllayer).__init__(**kwargs)

        self.units = units

        self.forget_layer = tf.keras.layers.Dense(units,  
                                                   activation="sigmoid",
                                                  bias_initializer='ones')
        self.input_layer = tf.keras.layers.Dense(units, 
                                                 activation="sigmoid")

        self.candidates_layer = tf.keras.layers.Dense(units, 
                                                 activation="tanh")
        
        self.output_layer = tf.keras.layers.Dense(units, 
                                                 activation="sigmoid")
    
    @property
    def state_size(self):
      #dimensionality of the RNN state
      return [tf.TensorShape([self.recurrent_units_1]), 
              tf.TensorShape([self.recurrent_units_2])]
    @property
    def output_size(self):
      # list containing final output 
      return [tf.TensorShape([self.recurrent_units_2])]
    
    def get_initial_state(self, inputs=None, batch_size=None, dtype=None):
      # states are then passed to the call method
      return [tf.zeros([self.recurrent_units_1]), 
                tf.zeros([self.recurrent_units_2])]

    def call(self, inputs, hidden_state, cell_state):
        input = tf.concat([input, hidden_state], axis = 1)

        forget_gate = self.forget_layer(input)

        passed_forget = tf.math.multiply(forget_gate, hidden_state)

        input_gate = self.input_layer(input)
        candidates = self.candidates_layer(input)
        new_cell_state = tf.math.add(passed_forget, tf.math.multiply(input_gate, candidates))

        output_gate = self.output_layer(input)
        new_hidden_state = np.multiply(output_gate, tf.nn.tanh(new_cell_state))

        return [new_hidden_state, new_cell_state]

In [None]:
from tensorflow.keras.layers import Dense
class RNN_Model(tf.keras.Model):

    def __init__(self, seq_len = 4, optimizer = tf.keras.optimizers.Adam()):
        super(RNN_Model, self).__init__()

        self.optimizer = optimizer
        self.loss_function = tf.keras.losses.MeanSquaredError()

        self.input_ly = tf.keras.layers.Dense(units=1, activation='sigmoid')
        self.LSTM =  LSTMWrapper(AbstractRNNCelllayer(2))
        self.output_ly = tf.keras.layers.Dense(units=1, activation='sigmoid')


        self.metrics_list = [tf.keras.metrics.Mean(name="loss"),
                             tf.keras.metrics.BinaryAccuracy()]

    @property
    def metrics(self):
        return self.metrics_list
    
    def reset_metrics(self):
        for metric in self.metrics:
            metric.reset_state()

    def call(self,data):
        x = self.input_ly(data)
        x= self.LSTM(x,self.LSTM.zeros(8),self.LSTM.zeros(8))
        return [self.output_ly(val) for val in x]

    @tf.function
    def train_step(self, train_data):
        x, target = train_data
        with tf.GradientTape() as tape:
            output = self(x, training = True)
            loss = self.loss_function(target, output)
        gradients = tape.gradient(loss, self.trainable_variables)
        self.optimizer.apply_gradients(zip(gradients, self.trainable_variables))

        self.metrics[0].update_state(loss)
        self.metrics[1].update_state(target, output)
        
        return {m.name : m.result() for m in self.metrics}

    @tf.function
    def test(self, test_data):

        x, target = test_data
        output = self(x, training=False)
        loss = self.loss_function(target, output)
                
        self.metrics[0].update_state(loss)
        self.metrics[1].update_state(target, output)
        
        return {m.name : m.result() for m in self.metrics}  


In [None]:

def training_loop(model, train_ds, val_ds, epochs, train_summary_writer, val_summary_writer): 
    '''
    create the training loop where the model is trained on the train dataset and afterwards tested on the validation dataset
    '''
    hist = {"train_loss":[],"train_acc":[],"test_loss":[], "test_acc":[]}
    #iterate over given amount of epochs
    for epoch in range(epochs): 
        print(f"Epoch {epoch}: ")

        #train on all batches of the training data
        for data in tqdm.tqdm(train_ds, position = 0, leave = True):
            metrics = model.train_step(data)

            with train_summary_writer.as_default(): 
                for metric in model.metrics: 
                    tf.summary.scalar(f"{metric.name}", metric.result(), step=epoch)

        # print the metrics and add to history element
        for key, value in metrics.items():
            hist[f"train_{key}"].append(value.numpy())
            print(f"train_{key}: {value.numpy()}")

        #reset metric 
        model.reset_metrics()

        #evaluation on validation set
        for data in val_ds:
            metrics = model.test_step(data)

            with val_summary_writer.as_default():
                for metric in model.metrics:
                    tf.summary.scalar(f"{metric.name}", metric.result(), step=epoch)

        # print the metrics and add to history element
        for key, value in metrics.items():
            hist[f"test_{key}"].append(value.numpy())
            print(f"test_{key}: {value.numpy()}")

        #reset metric
        model.reset_metric()
        print("\n")
    
    return hist

In [None]:

def create_summary_writers(config_name):
  '''
  create the summary writer to have access to the metrics of the model 
  '''
  current_time = datetime.datetime.now().strftime("%Y%m%d-%H%M%S")

  train_log_path = f"logs/submission/{config_name}/{current_time}/train"
  val_log_path = f"logs/submission/{config_name}/{current_time}/val"

  # log writer
  train_summary_writer = tf.summary.create_file_writer(train_log_path)
  val_summary_writer = tf.summary.create_file_writer(val_log_path)
    
  return train_summary_writer, val_summary_writer


In [None]:
import matplotlib.pyplot as plt

def run(model,num_epochs=15,save=True,load=False,config_name = f'RUN'):
  '''
  run the model and specify the number of epochs, batch_size and prepare the training and testing data
  '''

  ### Hyperparameters
  num_epochs = num_epochs
  batch_size = 32

  
  
  train_summary_writer, val_summary_writer = create_summary_writers(config_name)

  train_ds, test_ds = tfds.load('mnist', split=['train', 'test'], as_supervised=True)

  train_targets = tf.data.Dataset.from_tensor_slices(new_target_fnc(train_ds, 4))
  test_targets = tf.data.Dataset.from_tensor_slices(new_target_fnc(test_ds, 4))

  train_ds = tf.data.Dataset.zip((train_ds, train_targets))
  test_ds = tf.data.Dataset.zip((test_ds, test_targets))
  # train_ds = cumsum_dataset(train_targets, 4)
  # test_ds = cumsum_dataset(test_targets, 4)

  train = prepare_data(train_ds)  
  test = prepare_data(test_ds)

  if load:
    model.load_weights(f"saved_model_{config_name}");

  hist = training_loop(model,train,test,num_epochs,train_summary_writer, val_summary_writer)

  if save:
    model.save_weights(f"saved_model_{config_name}", save_format="tf")


  
    
  fig, ax = plt.subplots(1,2, figsize=(10,10))

  ax[0].plot(hist['train_acc'], label='train accuracy')
  ax[0].plot(hist['test_acc'], label='test accuracy')
  ax[0].legend()

  ax[1].plot(hist['train_loss'], label='train loss')
  ax[1].plot(hist['test_loss'], label='test loss')
  ax[1].legend();

In [None]:
myMod2 = RNN_Model()
print("Basic Convolutional Network, optimizer=Adam, L1 regularizer, dropout layers")
run(myMod2)

Basic Convolutional Network, optimizer=Adam, L1 regularizer, dropout layers
Epoch 0: 


  0%|          | 0/469 [00:00<?, ?it/s]


AttributeError: ignored