In [1]:
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
import datetime
import tensorflow_datasets as tfds

In [2]:
mnist = tfds.load('mnist', split=['train', 'test'], as_supervised=True)

''' shape should be (batch_size, sequence_length, features) '''
def preprocess_data(mnist, batch_size, sequence_length):
  #convert data from uint8 to float32
  mnist = mnist.map(lambda img, target: (tf.cast(img, tf.float32), target))
  #sloppy input normalization, just bringing image values from range [0, 255] to [-1, 1]
  mnist = mnist.map(lambda img, target: ((img/128.)-1., target))
  mnist = mnist.shuffle(1000)

  #create tensor with length of data/representing indices
  stop = sequence_length
  sequence_vector = tf.range(sequence_length)
  alternating_target_numbers = []
  small_mnist = []
  start = 0
  

  while start < (int(len(mnist)/sequence_length)):
    for value in mnist:
      for i in range(start, stop):
          small_mnist.append(value)

      alternating_target_numbers.append(tf.where(tf.math.floormod(sequence_vector, 2) == 0, [elem[1] for elem in small_mnist], [- elem[1] for elem in small_mnist]))
      small_mnist = []
      start += sequence_length
      stop += sequence_length

  print("alternating_target_numbers", alternating_target_numbers)

  c_sum = tf.math.cumsum(alternating_target_numbers)
  c_sum = tf.data.Dataset.from_tensor_slices(c_sum)

  # put MNIST and new targets together
  mnist = tf.data.Dataset.zip((mnist, c_sum))
  mnist = mnist.map(lambda img, target: (img[0], target))

  #print("c_sum:", c_sum)
  #cache this progress in memory, as there is no need to redo it; it is deterministic after all
  mnist = mnist.cache()
  mnist = mnist.batch(batch_size)
  mnist = mnist.prefetch(20)
  #return preprocessed dataset
  return mnist


In [3]:
class LSTMCell(tf.keras.layers.AbstractRNNCell):

    def __init__(self, num_units, **kwargs):
        super().__init__(**kwargs)

        self.num_units = num_units
        self.hidden_state = num_units
        self.cell_state = num_units
        self.states = [self.hidden_state, self.cell_state]
        
        # first sigmoid layer: forget_gate
        self.layer_sigmoid1 = tf.keras.layers.Dense(num_units,
                                                     kernel_initializer= tf.keras.initializers.Orthogonal(gain=1.0, seed=None),
                                                     activation=tf.nn.sigmoid)
        # second sigmoid layer: input_gate
        self.layer_sigmoid2 = tf.keras.layers.Dense(num_units, kernel_initializer= tf.keras.initializers.Orthogonal(gain=1.0, seed=None), 
                                                       activation=tf.nn.sigmoid)    

        # tanh layer: input_gate (candidates)
        self.layer_tanh = tf.keras.layers.Dense(num_units, kernel_initializer= tf.keras.initializers.Orthogonal(gain=1.0, seed=None), 
                                                       activation=tf.nn.tanh)
        # third sigmoid layer: output_gate
        self.layer_sigmoid3 = tf.keras.layers.Dense(num_units, kernel_initializer= tf.keras.initializers.Orthogonal(gain=1.0, seed=None), 
                                                       activation=tf.nn.sigmoid)
        

        # layer normalization for trainability
        self.layer_norm_h = tf.keras.layers.LayerNormalization()
        self.layer_norm_c = tf.keras.layers.LayerNormalization()
    
    @property
    def state_size(self):
        return [tf.TensorShape(self.hidden_state), tf.TensorShape(self.cell_state)]

    @property
    def output_size(self):
        return tf.TensorShape(self.hidden_state) # return [tf.TensorShape([self.recurrent_units_2])]


    def get_initial_state(self, inputs=None, batch_size=None, dtype=None): 
        return (tf.zeros((32, self.hidden_state,self.hidden_state, 1)),
                tf.zeros((32, self.cell_state, self.cell_state, 1)))
        


    def call(self, input, states):

      # unpack the states
      hidden_s, cell_s = states

      #concatenate the state
      concat_inp = tf.concat([input, hidden_s], axis=1)
      
      # apply first recurrent kernel        
      forget_gate = self.layer_sigmoid1(concat_inp)
      new_cell_state = tf.math.multiply(forget_gate, cell_s)

      input_gate = self.layer_sigmoid2(concat_inp)
      c_gate = self.layer_tanh(concat_inp)
      product = tf.math.multiply(c_gate,input_gate)
      new_cell_state = product + new_cell_state

      output_gate = self.layer_sigmoid3(concat_inp)
      x = tf.math.tanh(new_cell_state)
      new_hidden_state = tf.math.multiply(x,output_gate)
      
      new_hidden_state = tf.keras.layers.TimeDistributed(self.global_pool)(new_hidden_state)

      new_cell_state = tf.keras.layers.TimeDistributed(self.global_pool)(new_cell_state)

      
      # return output and the list of new states of the layers
      return new_hidden_state, [new_hidden_state, new_cell_state]   


    def get_config(self):
        return {"hidden state": self.hidden_state, 
                "cell state": self.cell_state,
                "number of units": self.num_units}

In [4]:
class LSTMModel(tf.keras.Model):
    def __init__(self):
        super().__init__()

        self.convlayer1 = tf.keras.layers.Conv2D(filters=32, kernel_size=3, padding='same', activation='relu')
        self.convlayer2 = tf.keras.layers.Conv2D(filters=32, kernel_size=3, padding='same', activation='relu')
        self.pooling = tf.keras.layers.MaxPooling2D(pool_size=2, strides=2)

        self.convlayer3 = tf.keras.layers.Conv2D(filters=64, kernel_size=3, padding='same', activation='relu')
        self.convlayer4 = tf.keras.layers.Conv2D(filters=64, kernel_size=3, padding='same', activation='relu')
        self.global_pool = tf.keras.layers.GlobalAvgPool2D()

        #print(tf.shape(self.global_pool))
        self.lstm_cell = LSTMCell(num_units=28, dtype=tf.float32)
        
        # return_sequences collects and returns the output of the lstm_cell for all time-steps
        # unroll unrolls the network for speed (at the cost of memory)
        self.lstm_layer = tf.keras.layers.RNN(self.lstm_cell, return_sequences=True, unroll=True) ###F return_seq -> True; RNN wrapper
        
        self.output_layer = tf.keras.layers.Dense(1, activation="relu") ###F
        
        self.metrics_list = [tf.keras.metrics.Mean(name="loss"),
                             tf.keras.metrics.BinaryAccuracy()]

    
    @property
    def metrics(self):
        return self.metrics_list
    
    def reset_metrics(self):
        for metric in self.metrics:
            metric.reset_state()
        
    def call(self, x, training=False):

      x = self.convlayer1(x)
      x = self.convlayer2(x)
      x = self.pooling(x)
      x = self.convlayer3(x)
      x = self.convlayer4(x)
      x = self.global_pool(x)
      
      x = self.lstm_layer(x)
      
      return self.output_layer(x)
    
    def train_step(self, data):
        
        """
        Standard train_step method, assuming we use model.compile(optimizer, loss, ...)
        """
        
        sequence, label = data
        with tf.GradientTape() as tape:
            output = self(sequence, training=True)
            loss = self.compiled_loss(label, output, regularization_losses=self.losses)
        gradients = tape.gradient(loss, self.trainable_variables)
        
        self.optimizer.apply_gradients(zip(gradients, self.trainable_variables))
        
        self.metrics[0].update_state(loss)
        self.metrics[1].update_state(label, output)
        
        return {m.name : m.result() for m in self.metrics}
    
    def test_step(self, data):
        
        """
        Standard test_step method, assuming we use model.compile(optimizer, loss, ...)
        """
        
        sequence, label = data
        #print(sequence)
        output = self(sequence, training=False)
        loss = self.compiled_loss(label, output, regularization_losses=self.losses)
                
        self.metrics[0].update_state(loss)
        self.metrics[1].update_state(label, output)
        
        return {m.name : m.result() for m in self.metrics}

In [6]:
# TASK 4 - Training the networks


def training():
  
  # instantiate the model
  ourmodel = LSTMModel()

  
  optimizer = tf.keras.optimizers.Adam(learning_rate=1e-3)
  loss = tf.keras.losses.MeanSquaredError()

  # compile the model (here, adding a loss function and an optimizer)
  ourmodel.compile(optimizer = optimizer, loss=loss)
  
  # create datasets
  train_ds = mnist[0]
  test_ds = mnist[1]

  train_ds = preprocess_data(train_ds, batch_size=32, sequence_length =20) #train_ds.apply(preprocess)
  test_ds = preprocess_data(test_ds, batch_size=32, sequence_length =20) #val_ds.apply(preprocess)

  # internal training loop function
  def training_loop(model, train_ds, test_ds, epochs, save_path=False): 

    #save_path = save_path

    for epoch in range(epochs):
        print(f"Epoch {epoch}:")
        print('Loop 1')
        # Validation:
        # (we do the validation first so that we get the accuracy and loss before training the network)
        for data in test_ds:
           print('Loop 2')
           metrics = model.test_step(data)
        
        print([f"test_{key}: {value.numpy()}" for (key, value) in metrics.items()])

        # reset all metrics
        model.reset_metrics()    
        
        # Training:
        for data in train_ds:
           print('Loop 1')
           metrics = model.train_step(data)

        # print the metrics
        print([f"train_{key}: {value.numpy()}" for (key, value) in metrics.items()])


        # reset all metrics
        model.reset_metrics()
        print("\n")

    #save weights
    #if save_path:
    #    model.save_weights(save_path)
  
  training_loop(ourmodel, train_ds, test_ds, 11)#, f"logs/{str(opt)}/weights")
  print('end')
  return

training()


alternating_target_numbers 

IOPub data rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_data_rate_limit`.

Current values:
NotebookApp.iopub_data_rate_limit=1000000.0 (bytes/sec)
NotebookApp.rate_limit_window=3.0 (secs)



[1;30;43mDie letzten 5000 Zeilen der Streamingausgabe wurden abgeschnitten.[0m
       -9,  9, -9])>, <tf.Tensor: shape=(20,), dtype=int64, numpy=
array([ 6, -6,  6, -6,  6, -6,  6, -6,  6, -6,  6, -6,  6, -6,  6, -6,  6,
       -6,  6, -6])>, <tf.Tensor: shape=(20,), dtype=int64, numpy=
array([ 6, -6,  6, -6,  6, -6,  6, -6,  6, -6,  6, -6,  6, -6,  6, -6,  6,
       -6,  6, -6])>, <tf.Tensor: shape=(20,), dtype=int64, numpy=
array([ 5, -5,  5, -5,  5, -5,  5, -5,  5, -5,  5, -5,  5, -5,  5, -5,  5,
       -5,  5, -5])>, <tf.Tensor: shape=(20,), dtype=int64, numpy=
array([ 6, -6,  6, -6,  6, -6,  6, -6,  6, -6,  6, -6,  6, -6,  6, -6,  6,
       -6,  6, -6])>, <tf.Tensor: shape=(20,), dtype=int64, numpy=array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0])>, <tf.Tensor: shape=(20,), dtype=int64, numpy=
array([ 3, -3,  3, -3,  3, -3,  3, -3,  3, -3,  3, -3,  3, -3,  3, -3,  3,
       -3,  3, -3])>, <tf.Tensor: shape=(20,), dtype=int64, numpy=
array([ 7, -7,  7, -7,  7, -

ValueError: ignored

In [None]:
EXPERIMENT_NAME = "lstm_noise"
current_time = datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
logging_callback = tf.keras.callbacks.TensorBoard(log_dir=f"./logs/{EXPERIMENT_NAME}/{current_time}")

In [None]:
history = model.fit(train_ds,
                    validation_data=val_ds,
                    initial_epoch=25,
                    epochs=50,
                    callbacks=[logging_callback])

In [None]:
# save the complete model (incl. optimizer state, loss function, metrics etc.)
# ideally save to google drive if you're using colab
model.save("saved_model")

In [None]:
# load the model and resume training where we had to stop
loaded_model = tf.keras.models.load_model("saved_model", custom_objects={"LSTMCell": LSTMCell,
                                                                         "LSTMModel": LSTMModel})