In [16]:
import tensorflow as tf
import tensorflow_datasets as tfds
import datetime
import numpy as np

%load_ext tensorboard

The tensorboard extension is already loaded. To reload it, use:
  %reload_ext tensorboard


In [17]:
a = lambda x1, x2: int(x1 + x2 >= 5)
b = lambda x1, x2: x1 - x2

In [19]:
# 1. get mnist from tensorflow_datasets
#mnist = tfds.load("mnist", split =["train","test"], as_supervised=True)
#train_ds = mnist[0]
#val_ds = mnist[1]

# 2. write function to create the dataset that we want
def preprocess(data, batch_size, target_function, depth):
    '''
    Preprocesses the data so that it has two input images and one target.

            Parameters:
                    data (dict): dictionary of vectors and integers
                    batch_size (int): decimal integer
                    target_function : lambda function
                    depth (int): decimal integer

            Returns:
                    data set with sets of 2 normalized image vectors and a corresponding
                    one-hot target vector
    '''
    # image should be float
    data = data.map(lambda x, t: (tf.cast(x, float), t))
    # image should be flattened
    data = data.map(lambda x, t: (tf.reshape(x, (-1,)), t))
    # image vector will here have values between -1 and 1
    data = data.map(lambda x,t: ((x/128.)-1., t))
    # we want to have two mnist images in each example
    # this leads to a single example being ((x1,y1),(x2,y2))
    zipped_ds = tf.data.Dataset.zip((data.shuffle(2000), 
                                     data.shuffle(2000)))
    #for subtask a: map ((x1,y1),(x2,y2)) to (x1,x2, y1+y2 >= 5*) *boolean
    #for loss use binary cross entropy and for activation function sigmoid
    #for subtask b: output layer is vector for values betweeen -9 and +9 -> turn into one_hot_vector
    #for loss use categorical cross entropy and for activation function softmax
    zipped_ds = zipped_ds.map(lambda x1, x2: (x1[0], x2[0], target_function(x1[1], x2[1])))
    zipped_ds = zipped_ds.map(lambda x1, x2, target: (x1, x2, tf.one_hot(target, depth=depth)))
    # batch the dataset
    zipped_ds = zipped_ds.batch(batch_size)
    # prefetch
    zipped_ds = zipped_ds.prefetch(tf.data.AUTOTUNE)
    return zipped_ds

#train_ds = preprocess(train_ds, batch_size=32, target_function=a, depth=1) #train_ds.apply(preprocess)
#val_ds = preprocess(val_ds, batch_size=32, target_function=a, depth=1) #val_ds.apply(preprocess)



In [20]:
# check the contents of the dataset
for img1, img2, label in train_ds.take(1):
    print(img1.shape, img2.shape, label.shape)

2022-11-27 21:56:02.313549: W tensorflow/core/kernels/data/cache_dataset_ops.cc:856] The calling iterator did not fully read the dataset being cached. In order to avoid unexpected truncation of the dataset, the partially cached contents of the dataset  will be discarded. This can happen if you have an input pipeline similar to `dataset.cache().take(k).repeat()`. You should use `dataset.take(k).cache().repeat()` instead.


ValueError: not enough values to unpack (expected 3, got 2)

In [21]:


class TwinMNISTModel(tf.keras.Model):

    # 1. constructor
    def __init__(self, hidden_activation, output_activation, optimizer, loss_function):

        '''
        Initializes the Model with the metrics and layers.

                Parameters:
                        hidden_activation (func): activation function for hidden layers
                        output_activation (func): 
                        b (int): Another decimal integer

                Returns:
                        binary_sum (str): Binary string of the sum of a and b
        '''
        super().__init__()
        # inherit functionality from parent class

        # optimizer, loss function and metrics
        self.metrics_list = [tf.keras.metrics.BinaryAccuracy(),
                             tf.keras.metrics.Mean(name="loss")]
        
        self.optimizer = optimizer
        
        self.loss_function = loss_function
        
        # layers to be used
        self.dense1 = tf.keras.layers.Dense(32, activation=hidden_activation) #a: ? b: ?
        self.dense2 = tf.keras.layers.Dense(32, activation=hidden_activation)
        
        self.dense3 = tf.keras.layers.Dense(32, activation=hidden_activation)
        
        self.out_layer = tf.keras.layers.Dense(1,activation=output_activation) #a: sigmoid b: softmax
        
        
        
        
    # 2. call method (forward computation)
    def call(self, images, training=False):
        img1, img2 = images
        
        img1_x = self.dense1(img1)
        img1_x = self.dense2(img1_x)
        
        img2_x = self.dense1(img2)
        img2_x = self.dense2(img2_x)
        
        combined_x = tf.concat([img1_x, img2_x ], axis=1)
        combined_x = self.dense3(combined_x)
        
        return self.out_layer(combined_x)



    # 3. metrics property
    @property
    def metrics(self):
        return self.metrics_list
        # return a list with all metrics in the model



    # 4. reset all metrics objects
    def reset_metrics(self):
        for metric in self.metrics:
            metric.reset_states()



    # 5. train step method
    @tf.function
    def train_step(self, data):
        img1, img2, label = data
        
        with tf.GradientTape() as tape:
            output = self((img1, img2), training=True)
            loss = self.loss_function(label, output)
            
        gradients = tape.gradient(loss, self.trainable_variables)
        
        self.optimizer.apply_gradients(zip(gradients, self.trainable_variables))
        
        # update the state of the metrics according to loss
        self.metrics[0].update_state(label, output)
        self.metrics[1].update_state(loss)
        
        # return a dictionary with metric names as keys and metric results as values
        return {m.name : m.result() for m in self.metrics}



    # 6. test_step method
    @tf.function
    def test_step(self, data):
        img1, img2, label = data
        # same as train step (without parameter updates)
        output = self((img1, img2), training=False)
        loss = self.loss_function(label, output)
        self.metrics[0].update_state(label, output)
        self.metrics[1].update_state(loss)
        
        return {m.name : m.result() for m in self.metrics}



In [22]:
def create_summary_writers(config_name):
    
    # Define where to save the logs
    # along with this, you may want to save a config file with the same name so you know what the hyperparameters were used
    # alternatively make a copy of the code that is used for later reference
    
    current_time = datetime.datetime.now().strftime("%Y%m%d-%H%M%S")

    train_log_path = f"logs/{config_name}/{current_time}/train"
    val_log_path = f"logs/{config_name}/{current_time}/val"

    # log writer for training metrics
    train_summary_writer = tf.summary.create_file_writer(train_log_path)

    # log writer for validation metrics
    val_summary_writer = tf.summary.create_file_writer(val_log_path)
    
    return train_summary_writer, val_summary_writer

train_summary_writer, val_summary_writer = create_summary_writers(config_name="RUN1")

In [36]:
import tqdm
def training_loop(model, train_ds, val_ds, start_epoch,
                  epochs, train_summary_writer, 
                  val_summary_writer, save_path):

    # 1. iterate over epochs
    for e in range(start_epoch, epochs):

        # 2. train steps on all batches in the training data
        for data in tqdm.tqdm(train_ds, position=0, leave=True):
            metrics = model.train_step(data)

        # 3. log and print training metrics

        with train_summary_writer.as_default():
            # for scalar metrics:
            for metric in model.metrics:
                    tf.summary.scalar(f"{metric.name}", metric.result(), step=e)
            # alternatively, log metrics individually (allows for non-scalar metrics such as tf.keras.metrics.MeanTensor)
            # e.g. tf.summary.image(name="mean_activation_layer3", data = metrics["mean_activation_layer3"],step=e)
        
        #print the metrics
        print([f"{key}: {value.numpy()}" for (key, value) in metrics.items()])
        
        # 4. reset metric objects
        model.reset_metrics()


        # 5. evaluate on validation data
        for data in val_ds:
            metrics = model.test_step(data)
        

        # 6. log validation metrics

        with val_summary_writer.as_default():
            # for scalar metrics:
            for metric in model.metrics:
                    tf.summary.scalar(f"{metric.name}", metric.result(), step=e)
            # alternatively, log metrics individually (allows for non-scalar metrics such as tf.keras.metrics.MeanTensor)
            # e.g. tf.summary.image(name="mean_activation_layer3", data = metrics["mean_activation_layer3"],step=e)
            
        print([f"val_{key}: {value.numpy()}" for (key, value) in metrics.items()])
        # 7. reset metric objects
        model.reset_metrics()
        
    # 8. save model weights if save_path is given
    if save_path:
        model.save_weights(save_path)

In [37]:
# open the tensorboard logs
%tensorboard --logdir logs/

Reusing TensorBoard on port 6006 (pid 3114), started 0:01:40 ago. (Use '!kill 3114' to kill it.)

In [38]:
# Initialize the functions for model.
def subtask(option):
    # 1. get mnist from tensorflow_datasets
    mnist = tfds.load("mnist", split =["train","test"], as_supervised=True)
    train_ds = mnist[0]
    val_ds = mnist[1]

    if option == "a":
        hidden_activation = tf.nn.relu
        output_activation = tf.nn.sigmoid
        optimizer = tf.keras.optimizers.SGD(0.001)
        loss_function = tf.keras.losses.BinaryCrossentropy()
        train_ds = preprocess(train_ds, batch_size=32, target_function=a, depth=1) #train_ds.apply(preprocess)
        val_ds = preprocess(val_ds, batch_size=32, target_function=a, depth=1) #val_ds.apply(preprocess)
    if option == "b":
        hidden_activation = tf.nn.relu
        output_activation = tf.nn.softmax
        optimizer = tf.keras.optimizers.SGD(0.001)
        loss_function = tf.keras.losses.CategoricalCrossentropy()
        train_ds = preprocess(train_ds, batch_size=32, target_function=b, depth=19) #train_ds.apply(preprocess)
        val_ds = preprocess(val_ds, batch_size=32, target_function=b, depth=19) #val_ds.apply(preprocess)
    
    model = TwinMNISTModel(hidden_activation, output_activation, optimizer, loss_function)
    
    save_path = "trained_model_RUN1"
    
    training_loop(model=model,
    train_ds=train_ds,
    val_ds=val_ds,
    start_epoch=0,
    epochs=10,
    train_summary_writer=train_summary_writer,
    val_summary_writer=val_summary_writer,
    save_path=save_path)


#initialize model with corresponding functions
#model_a = TwinMNISTModel(hidden_activation, output_activation, optimizer, loss_function)# 1. instantiate model

# 2. choose a path to save the weights
#save_path = "trained_model_RUN1"

# 2. pass arguments to training loop function

'''training_loop(model=model_a,
    train_ds=train_ds,
    val_ds=val_ds,
    start_epoch=0,
    epochs=10,
    train_summary_writer=train_summary_writer,
    val_summary_writer=val_summary_writer,
    save_path=save_path)'''
subtask("b")

  0%|                                                                                                                                                                                                             | 0/1875 [00:00<?, ?it/s]2022-11-27 22:15:07.121343: W tensorflow/core/kernels/data/cache_dataset_ops.cc:856] The calling iterator did not fully read the dataset being cached. In order to avoid unexpected truncation of the dataset, the partially cached contents of the dataset  will be discarded. This can happen if you have an input pipeline similar to `dataset.cache().take(k).repeat()`. You should use `dataset.take(k).cache().repeat()` instead.
2022-11-27 22:15:07.123977: W tensorflow/core/kernels/data/cache_dataset_ops.cc:856] The calling iterator did not fully read the dataset being cached. In order to avoid unexpected truncation of the dataset, the partially cached contents of the dataset  will be discarded. This can happen if you have an input pipeline similar to `dataset.cac

ValueError: in user code:

    File "/tmp/ipykernel_1843/3477403046.py", line 78, in train_step  *
        loss = self.loss_function(label, output)
    File "/home/imogen/miniconda3/envs/iannwtf/lib/python3.7/site-packages/keras/losses.py", line 139, in __call__  **
        losses = call_fn(y_true, y_pred)
    File "/home/imogen/miniconda3/envs/iannwtf/lib/python3.7/site-packages/keras/losses.py", line 243, in call  **
        return ag_fn(y_true, y_pred, **self._fn_kwargs)
    File "/home/imogen/miniconda3/envs/iannwtf/lib/python3.7/site-packages/keras/losses.py", line 1788, in categorical_crossentropy
        y_true, y_pred, from_logits=from_logits, axis=axis)
    File "/home/imogen/miniconda3/envs/iannwtf/lib/python3.7/site-packages/keras/backend.py", line 5119, in categorical_crossentropy
        target.shape.assert_is_compatible_with(output.shape)

    ValueError: Shapes (32, 19) and (32, 1) are incompatible


In [None]:
tf.keras.backend.clear_session()

#For showcasing we only use a subset of the training and test data (generally use all of the available data!)
#train_dataset = train_dataset.take(1000)
#test_dataset = test_dataset.take(100)

### Hyperparameters
num_epochs = 10
learning_rate = 0.001

# Initialize the functions for model.
hidden_activation = tf.nn.relu
output_activation = tf.nn.sigmoid
optimizer = tf.keras.optimizers.SGD(learning_rate)
loss_function = tf.keras.losses.BinaryCrossentropy()

#initialize model with corresponding functions
model_a = TwinMNISTModel(hidden_activation, output_activation, optimizer, loss_function)

# Initialize lists for later visualization.
train_losses = []
test_losses = []
test_accuracies = []

#testing once before we begin
test_loss, test_accuracy = model_a.test(val_ds)
test_losses.append(test_loss)
test_accuracies.append(test_accuracy)

#check how model performs on train data once before we begin
train_loss, _ = model_a.test(val_ds) 
train_losses.append(train_loss)

# We train for num_epochs epochs.
for epoch in range(num_epochs):
    print(f'Epoch: {str(epoch)} starting with accuracy {test_accuracies[-1]}')

    #training (and checking in with training)
    epoch_loss_agg = []
    for data in train_ds:
        train_loss = model_a.train_step(data)
        epoch_loss_agg.append(train_loss)
    
    #track training loss
    train_losses.append(tf.reduce_mean(epoch_loss_agg))

    #testing, so we can track accuracy and test loss
    test_loss, test_accuracy = model_a.test(val_ds)
    test_losses.append(test_loss)
    test_accuracies.append(test_accuracy)