**Libraries and Modules**

In [1]:
import numpy as np

%tensorflow_version 2.x
import tensorflow as tf
import tensorflow_datasets as tfds

import matplotlib.pyplot as plt

**Task 1: Data set**


In [2]:
# Description: Function to convert the string tensor into a usable tensor that contains the one-hot-encoded sequence 
#              (4 digits for each letter (250 letters per input) -> all in all 1000 input neurons).
#              @parameters: tensor
#              @return: onehot
def onehotify(tensor):
  vocab = {'A':'1', 'C': '2', 'G':'3', 'T':'0'}
  for key in vocab.keys():
    tensor = tf.strings.regex_replace(tensor, key, vocab[key])
  split = tf.strings.bytes_split(tensor)
  labels = tf.cast(tf.strings.to_number(split), tf.uint8)                            #DO IT YOURSELF FOR OUTSTANDING?????
  onehot = tf.one_hot(labels, 4)
  onehot = tf.reshape(onehot, (-1,))
  return onehot

In [3]:
# Description: In this part the batch size is defined and the training data (100.000 pcs.) and the test data (1.000 pcs.) are downloaded as two seperated tensorflow datasets.
#              They are present as Tensors in a 2-tuple structure (input, target). In the next steps the data is prepared.
#              Therefore the next step (map) is needed to seperate the tuples in two datasets for each - training and test data.
#              At last, the seperated datasets for training data and the seperated datasets from test data are zipped together again. 
#              Result is a dataset for test and a dataset for training data each including 2 datasets of targets resp. input.
#              Last but not least, these datasets are batched and prefetched. This is used to allow that later (here 2) elements can be prepared 
#              while the current element is being processed through the network.

batch_size = 100

training_data, test_data = tfds.load('genomics_ood', split = ['train[:100000]', 'test[:1000]'], as_supervised = True)


training_dataset_inputs = training_data.map(lambda inputs, targets: onehotify(inputs))
training_dataset_targets = training_data.map(lambda inputs, targets: tf.one_hot(targets, 10))

test_dataset_inputs = test_data.map(lambda inputs, targets: onehotify(inputs))
test_dataset_targets = test_data.map(lambda inputs, targets: tf.one_hot(targets, 10))


training_dataset = tf.data.Dataset.zip((training_dataset_inputs, training_dataset_targets))
training_dataset = training_dataset.batch(batch_size).prefetch(2)

test_dataset = tf.data.Dataset.zip((test_dataset_inputs, test_dataset_targets))
test_dataset = test_dataset.batch(batch_size).prefetch(2)     

**Task 2: Model**

In [4]:
from tensorflow.keras import Model
from tensorflow.keras.layers import Layer

# Description: The class Model describes a multi-layer perceptron with two hidden layers and one output layer.
#              @init parameters: -
#              @class variables: -
#              @object variables: hidden_layer_1, hidden_layer_2, output_layer
#              @functions: call
class Model(Model): 
    
    def __init__(self):
        super(Model, self).__init__()
        self.hidden_layer_1 = tf.keras.layers.Dense(units=256, activation=tf.keras.activations.sigmoid)
        self.hidden_layer_2 = tf.keras.layers.Dense(units=256, activation=tf.keras.activations.sigmoid)
        self.output_layer = tf.keras.layers.Dense(units=10, activation=tf.keras.activations.softmax)

    # Description: This function conducts one forward step of the model. 
    #              Therefore it computes firstly the activations of the perceptrons in the hidden layer and  secondly the activation of the output layer which is 
    #              the overall prediction of the model. The function is decorated with tf.function to allow the use of the graph structure of tensorflow.
    #              @parameters: inputs
    #              @return: prediction
    @tf.function
    def call(self, input):
        activ_hidden_1 = self.hidden_layer_1(input)
        activ_hidden_2 = self.hidden_layer_2(activ_hidden_1)
        prediction = self.output_layer(activ_hidden_2)
        return prediction

**Task 3: Training**

In [5]:
# Description: This function is used to train the network of a Model. It conducts a forward step and the backpropagation through the network. 
#              For vizualisation it calculates the training loss and the accuracy. The method for calculation of the loss is defined by the loss function (loss_fn).
#              The gradients are adjusted using the optimizer.
#              @parameter: mlp, training_data, loss_fn, optimizer
#              @return: training_loss, training_accuracy
def training_step(mlp, training_data, loss_fn, optimizer):
  training_losses = []
  training_accuracies = []

  for (input, target) in training_data:
    with tf.GradientTape() as tape:
      prediction = mlp(input)
      current_training_loss = loss_fn(target, prediction)
      gradients = tape.gradient(current_training_loss, mlp.trainable_variables)
      optimizer.apply_gradients(zip(gradients, mlp.trainable_variables))

    training_losses.append(current_training_loss)

    current_training_accuracy = np.argmax(target, axis=1) == np.argmax(prediction, axis=1)
    training_accuracies.append(current_training_accuracy)   
  
  training_loss = np.mean(training_losses)
  training_accuracy = np.mean(training_accuracies)
  return training_loss, training_accuracy


# Description: This function is used to test the network. For visualization it calculates the test loss and accuracy. 
#              The loss is calculated using the loss function (loss_fn)
#              @parameter: test_data, loss_fn
#              @return: test_loss, test_accuracy
def test(mlp, test_data, loss_fn):
  test_losses = []
  test_accuracies = []

  for (input, target) in test_data:
    prediction = mlp(input)
    
    current_test_loss = loss_fn(target, prediction)
    test_losses.append(current_test_loss)

    current_test_accuracy = np.argmax(target, axis=1) == np.argmax(prediction, axis=1)
    test_accuracies.append(current_test_accuracy)   
    
  test_loss = np.mean(test_losses)
  test_accuracy = np.mean(test_accuracies)
  return test_loss, test_accuracy

In [6]:
# Description: This part creates the Model and executes the training and testing of the model in the Test and Training loop. The training takes place over a amount 
#              of epochs (n_epochs) with a defined learning rate. The loss function defines the kind of calculating the loss. The optimizer is needed to adjust 
#              the gradient in the training steps. Moreover, lists for visualization of the test and training loss and accuracy are initilized and detected.
tf.keras.backend.clear_session()

mlp = Model()
n_epochs = 10
learning_rate = 0.1
loss_fn = tf.keras.losses.CategoricalCrossentropy()
optimizer = tf.keras.optimizers.SGD(learning_rate)    # SGD = Standard Gradient Descent

training_losses = []
training_accuracies = []
test_losses = []
test_accuracies = []

# Training and Test loop
for epoch in range(n_epochs):
    print('Epoch ' + str(epoch))

    #Shuffles the datasets for each epoch to ensure that the order of inputs is always changed.
    training_dataset = training_dataset.shuffle(buffer_size = batch_size)
    test_dataset = test_dataset.shuffle(buffer_size = batch_size)

    training_loss, training_accuracy = training_step(mlp, training_dataset, loss_fn, optimizer)
    training_losses.append(training_loss)
    training_accuracies.append(training_accuracy)

    test_loss, test_accuracy = test(mlp, test_dataset, loss_fn)
    test_losses.append(test_loss)
    test_accuracies.append(test_accuracy)

Epoch 0


ValueError: ignored

**Task 4: Visualization**

In [None]:
# Description: Figure 1 shows the loss for training and testing of the Model.
#              Figure 2 shows the accuracy for training and testing of the Model.
plt.figure()
line1, = plt.plot(training_losses)
line2, = plt.plot(test_losses)
plt.xlabel("Training steps")
plt.ylabel("Loss")
plt.legend((line1, line2),("Training", "Test"))
plt.show()

plt.figure()
line1, = plt.plot(training_accuracies)
line2, = plt.plot(test_accuracies)
plt.xlabel("Training steps")
plt.ylabel("Accuracy")
plt.legend((line1, line2),("Training", "Test"))
plt.show()