In [None]:
import numpy as np
import tensorflow as tf
import matplotlib.pyplot as plt
from tensorflow.keras import Model
from random import randint

In [None]:
def generate_sequence(num):
  """
  Generates a random sequence of digits from 0-9 of a given length.
  num:        Number of digits to be generated
  """
  return tf.random.uniform(shape=(num,1), maxval=9, dtype=tf.dtypes.int32)

def more_common(sequence, queryA, queryB):
  """
  Checks which of two given digits occur more often in a given sequence.
  sequence:   (list) Sequence of single digits
  queryA:     (int) First digit to check number of occurences for (range 0-9)
  queryB:     (int) Second digit to check number of occurences for (range 0-9)
  Return:     Value that occurs more often or 'equal' if both queries occur 
              equally often
  """
  vals, _, counts = tf.unique_with_counts(tf.reshape(sequence, shape=(-1,)))

  count_queryA = np.array(counts)[np.argwhere(np.array(vals) == queryA)] if queryA in np.array(vals) else 0
  count_queryB = np.array(counts)[np.argwhere(np.array(vals) == queryB)] if queryB in np.array(vals) else 0

  # Code as... 
  # 0 -> both occur equally often, 
  # 1 -> first query occurs more often
  # 2 -> second query occurs more often:
  if count_queryA > count_queryB:
    return tf.convert_to_tensor([1])
  elif count_queryB > count_queryA:
    return tf.convert_to_tensor([2])
  else:
    # they occur equally often
    return tf.convert_to_tensor([0])

**Functionality test for sequence generation and detecting more common value given two digits.**

In [None]:
seq = generate_sequence(10)
seq

<tf.Tensor: shape=(10, 1), dtype=int32, numpy=
array([[8],
       [3],
       [6],
       [4],
       [0],
       [3],
       [3],
       [8],
       [6],
       [3]], dtype=int32)>

In [None]:
result = more_common(seq, 9, 1)
result

<tf.Tensor: shape=(1,), dtype=int32, numpy=array([0], dtype=int32)>

## Create Dataset

Each dataset entry consists of a sequence of a given length and a query about two numbers. The label describes, whether the queried numbers occur equally often (label '0'), or which of the two numbers occur more often (label '1' for the first query, label '2' for the second query.

In [None]:
BATCH_SIZE = 10
TRAIN_DATASET_LENGTH = 100
TEST_DATASET_LENGTH = 50
SEQUENCE_LENGTH = 10

In [None]:
# Number of data sequence - target pairs
complete_dataset_length = TRAIN_DATASET_LENGTH + TEST_DATASET_LENGTH

# Create targets (two random numbers)
targets = [generate_sequence(2) for _ in range(complete_dataset_length)]

# Create sequences of length 25
sequences = [generate_sequence(SEQUENCE_LENGTH) for _ in range(complete_dataset_length)]

# Create labels (as above, )
labels = [more_common(seq, target[0], target[1]) for seq, target in zip(sequences, targets)]

Now we want to create a tensorflow dataset from that. We preposess all sequence, target, label as one hot encodings.

In [None]:
# Create dataset
train_data = tf.data.Dataset.from_tensor_slices((targets[:TRAIN_DATASET_LENGTH], sequences[:TRAIN_DATASET_LENGTH], labels[:TRAIN_DATASET_LENGTH]))
test_data = tf.data.Dataset.from_tensor_slices((targets[TRAIN_DATASET_LENGTH:], sequences[TRAIN_DATASET_LENGTH:], labels[TRAIN_DATASET_LENGTH:]))

# Taking a look at the data
for data in train_data.take(1):
  print('Train data example:\n Target: %s\n Sequence: %s\n Label: %s\n' % (data[:]))

Train data example:
 Target: tf.Tensor(
[[0]
 [7]], shape=(2, 1), dtype=int32)
 Sequence: tf.Tensor(
[[7]
 [3]
 [3]
 [1]
 [0]
 [1]
 [4]
 [5]
 [7]
 [3]], shape=(10, 1), dtype=int32)
 Label: tf.Tensor([2], shape=(1,), dtype=int32)



In [None]:
# PREPROCESSING:

# One hot encoding
# t -> target, s -> sequence, l -> label
train_data_prep = train_data.map(lambda t, s, l: (tf.one_hot(t, 10), tf.one_hot(s, 10), tf.one_hot(l, 3)))
test_data_prep = test_data.map(lambda t, s, l: (tf.one_hot(t, 10), tf.one_hot(s, 10), tf.one_hot(l, 3)))

# Print one example after one hot encoding
for data in train_data_prep.take(1):
  print('Train data example:\n\n Target: %s\n\n Sequence: %s\n\n Label: %s\n' % (data[:]))

# Batching
train_data_prep = train_data_prep.batch(BATCH_SIZE)
test_data_prep = test_data_prep.batch(BATCH_SIZE)

Train data example:

 Target: tf.Tensor(
[[[1. 0. 0. 0. 0. 0. 0. 0. 0. 0.]]

 [[0. 0. 0. 0. 0. 0. 0. 1. 0. 0.]]], shape=(2, 1, 10), dtype=float32)

 Sequence: tf.Tensor(
[[[0. 0. 0. 0. 0. 0. 0. 1. 0. 0.]]

 [[0. 0. 0. 1. 0. 0. 0. 0. 0. 0.]]

 [[0. 0. 0. 1. 0. 0. 0. 0. 0. 0.]]

 [[0. 1. 0. 0. 0. 0. 0. 0. 0. 0.]]

 [[1. 0. 0. 0. 0. 0. 0. 0. 0. 0.]]

 [[0. 1. 0. 0. 0. 0. 0. 0. 0. 0.]]

 [[0. 0. 0. 0. 1. 0. 0. 0. 0. 0.]]

 [[0. 0. 0. 0. 0. 1. 0. 0. 0. 0.]]

 [[0. 0. 0. 0. 0. 0. 0. 1. 0. 0.]]

 [[0. 0. 0. 1. 0. 0. 0. 0. 0. 0.]]], shape=(10, 1, 10), dtype=float32)

 Label: tf.Tensor([[0. 0. 1.]], shape=(1, 3), dtype=float32)



In [None]:
print(train_data_prep)

<BatchDataset shapes: ((None, 2, 1, 10), (None, 10, 1, 10), (None, 1, 3)), types: (tf.float32, tf.float32, tf.float32)>


## LSTM Cell

A LSTM cell consists of:
* a cell state
* three gates (input, forget, output)
* a cell state candidate
* a hidden state

In [None]:
# Activation functions
SIGMOID = tf.keras.activations.sigmoid
TANH = tf.keras.activations.tanh

In [None]:
class LSTM_Cell(Model):

  def __init__(self, units):
    """
    """
    super(LSTM_Cell, self).__init__()

    # orthogonale kernel initializer nutzen

    self.units = units

    self.cell_state = tf.zeros(shape=(BATCH_SIZE,1,3))

    self.input_gate = tf.keras.layers.Dense(units=self.units, activation=SIGMOID) # filter with zeros and ones, shape the same as cell state (sigmoid)
    # QUESTION: Forget gate bias soll auf 1 gesetzt werden, ich weiß nicht ob das so geht (weights = ...)
    self.forget_gate = tf.keras.layers.Dense(units=self.units, activation=SIGMOID, weights=[_, tf.ones(self.units)]) # filter with zeros and ones, shape the same as cell state (sigmoid)
    self.output_gate = tf.keras.layers.Dense(units=self.units, activation=SIGMOID) # applied to the (regularized) new cell state

    self.cell_state_candidate = tf.keras.layers.Dense(units=self.units, activation=TANH) # matrix of shape of cell state, uses tanh
    self.hidden_state = tf.zeros(shape=(BATCH_SIZE,1,3))# output x cell_state_candidate

  def call(self, x, states):
    """

    x:        Input for a single timestep
    states:   Tuple containing hidden state and cell state
    """
    (hidden_state, cell_state) = states

    # 
    concat_input = tf.concat([hidden_state, x])

    # update cell state
    self.cell_state = self.cell_state * self.forget_gate(concat_input)

    update = self.input_gate(concat_input) * self.cell_state_candidate(concat_input)

    self.cell_state = self.cell_state + update

    # Output is the new hidden state
    self.hidden_state = tf.nn.tanh(self.cell_state) * self.output_gate(concat_input)
    
    return self.hidden_state, self.cell_state

In [None]:
class LSTM(Model):

  def __init__(self, units):
    """
    """
    super(LSTM, self).__init__()

    # We need a LSTM cell
    self.lstm_cell = LSTM_Cell(units)


  def call(self, x):
    """
    """

    # A LSTM cell is called with data consisting of a target and a sequence
    target, sequence = x[0], x[1]

    # Length of sequence == timesteps ???
    timesteps = len(sequence)

    # For each timestep (digit in sequence) update the LSTM cell
    for timestep in range(timesteps):

      # Network input is the concatenation of target and each element in sequence
      network_input = tf.concat([[sequence[timestep], target]], axis=0) # sequence[timestep] must be in square brackets to avoid shape issues
      
      # Update lstm cells cell state by calling the function
      self.lstm_cell(network_input, (self.lstm_cell.hidden_state, self.lstm_cell.cell_state))

    output = self.lstm_cell.hidden_state

    self.lstm_cell.hidden_state = tf.zeros(shape=(BATCH_SIZE,1,3))
    self.lstm_cell.cell_state = tf.zeros(shape=(BATCH_SIZE,1,3))

    # Last hidden state is the output
    return output

In [None]:
class Model(Model):

  def __init__(self):
    """
    The model consists of an input layer, a LSTM layer and an output layer.
    """
    super(Model, self).__init__()

    # Input layer
    #self.read_in = 

    # Calls a LSTM
    #self.lstm = LSTM(10)

    # Output layer
    # Readout is a dense layer with three units (one hot encoding)
    self.read_out = tf.keras.layers.Dense(units=3, activation=SIGMOID)


  def call(self):
    


    # Calls LSTM with zero input (how?)
    self.lstm = LSTM(10)
    
    # LSTM must be called with tuple: (target, sequence)
    output = self.read_out(self.lstm())

    return output


In [None]:
lstm_model = Model()
print(lstm_model.units)

# for (target, seq, label) in train_data.take(1):
#   lstm_model((target, seq))

This is where we got stuck and did not know what to do anymore. We went to the coding support session but we still did not really understand the relevance and task of the three different classes - for the LSTM_cell we are sure that it somehow is what it is supposed to be, but the distinction between the LSTM class and Model class is our problem here. Reason being: for the next step, we do not really understand, what will be the call/ task for our network in the end.