In [55]:
from tensorflow.keras.applications import EfficientNetB2
from tensorflow.keras.layers import Input
from tensorflow.keras import layers
from tensorflow.keras.models import Model
import tensorflow as tf
from tensorflow import keras
import json
import help_functions as hf
import numpy as np
import pandas as pd
import time

In [2]:
tf.config.threading.set_intra_op_parallelism_threads(10) 
tf.config.threading.set_inter_op_parallelism_threads(10) 
gpus = tf.config.list_physical_devices('GPU')
if gpus:
    # Restrict TensorFlow to only use the first GPU
    try:
        tf.config.set_visible_devices(gpus[1], 'GPU')
        logical_gpus = tf.config.list_logical_devices('GPU')
        print(len(gpus), "Physical GPUs,", len(logical_gpus), "Logical GPU")
    except RuntimeError as e:
        print(e)

2 Physical GPUs, 1 Logical GPU


In [26]:
# train_output = (1 - labels) * get_constr_out(output, M) + labels * get_constr_out(labels * output, M)

# TODO: add an MCM (maximum constraint module) layer here. Hierarchy constraint expressed in matrix R
# it seems the MCM layer only is used at inference? No

In [51]:
with open('training_configurations.json', 'r') as fp:
    config = json.load(fp)[str(1)]

config['batch_size'] = 1
config['nr_classes'] = 3
test = hf.get_flow(df_file=config['data_folder'] + '/test_df.json.bz2',
                   nr_classes=config['nr_classes'],
                   batch_size=config['batch_size'],
                   image_dimension=config['image_dimension'])

Found 50000 non-validated image filenames belonging to 40 classes.


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_x_labels['labels'] = df['labels'].apply(lambda labels_list: [label for label in labels_list if label in top_classes])


Found 37088 validated image filenames belonging to 3 classes.


In [None]:
class CustomModel(keras.Model):
    def train_step(self, data):
        x, y = data
        with tf.GradientTape() as tape:
            y_pred = self(x, training=True) # forward pass
            # Compute loss
            bce = tf.keras.losses.BinaryCrossentropy(from_logits=False)
            loss = bce(y, y_pred)
        # Compute gradients
        trainable_vars = self.trainable_variables
        gradients = tape.gradient(loss, trainable_vars)
        # Update weights
        self.optimizer.apply_gradients(zip(gradients, trainable_vars))
        # Update metrics (includes the metric that tracks the loss)
        self.compiled_metrics.update_state(y, y_pred)
        # Return a dict mapping metric names to current value
        return {m.name: m.result() for m in self.metrics}

def get_uncompiled_model():
    basemodel = EfficientNetB2(include_top=False, weights=None, classes=20, input_shape=(32, 32, 3))
    inputs = Input(shape=(32, 32, 3))
    x = basemodel(inputs)
    x = layers.Flatten()(x)
    x = layers.Dense(128, activation='relu')(x)
    outputs = layers.Dense(20, activation='sigmoid')(x)
    model = CustomModel(inputs=inputs, outputs=outputs)
    return model

def get_compiled_model():
    model = get_uncompiled_model()
    model.compile(optimizer=tf.keras.optimizers.Adam(), metrics=['accuracy'])
    return model

# model = get_compiled_model()
# model.fit(test, verbose=1, epochs=1)

In [62]:
basemodel = EfficientNetB2(include_top=False, weights=None, classes=config['nr_classes'], input_shape=(config['image_dimension'], config['image_dimension'], 3))
inputs = Input(shape=(config['image_dimension'], config['image_dimension'], 3))
x = basemodel(inputs)
x = layers.Flatten()(x)
x = layers.Dense(128, activation='relu')(x)
outputs = layers.Dense(config['nr_classes'], activation='sigmoid')(x)
model = keras.Model(inputs=inputs, outputs=outputs)
model.compile(optimizer=tf.keras.optimizers.Adam(), metrics=['categorical_accuracy'], loss='binary_crossentropy')
# loss_fn = tf.keras.losses.BinaryCrossentropy()
# optimizer = tf.keras.optimizers.Adam()
metrics = tf.metrics.CategoricalAccuracy()

In [63]:
# M_ij = 1 if the class i is a subclass of the class j. I.e.: row i, column j is 1.
# - note that if M_ij is 1 then M_ji is necessarily 0 as the subclass relation is not reflexive
mask = np.array([[1, 0, 0],
                 [1, 1, 0],
                 [1, 0, 1]], dtype=np.float32)
np.fill_diagonal(mask, 1)
mask = np.transpose(mask) # TODO correct? Looking at results when only having 2 or 3 classes, yes!


In [None]:
def max_constrain(output, mask):
    "Constrains the output given the hierarchy expressed by the mask."
    batch_size = len(output) # or output.shape[0]
    nr_classes = mask.shape[0] # = mask.shape[1] = output.shape[1]

    output = tf.expand_dims(output, axis=1) # Pytorch unsqueeze()

    batch_output = tf.broadcast_to(output, [batch_size, nr_classes, nr_classes]) # this is H in the MCM equation
    batch_mask   = tf.broadcast_to(mask,   [batch_size, nr_classes, nr_classes])

    constrained_output = tf.math.reduce_max(batch_output * batch_mask, axis=2)

    return constrained_output

@tf.function   # to speed up: https://www.tensorflow.org/guide/keras/writing_a_training_loop_from_scratch#speeding-up_your_training_step_with_tffunction
def train_step(inputs, labels):
    with tf.GradientTape() as tape:
        y_probs = model(inputs, training=True)

        # Extra steps for coherent HMC:
        # 1. max constraint module        
        term_1 = (1 - labels) * max_constrain(y_probs, mask)
        term_2 = labels * max_constrain(labels * y_probs, mask)
        y_probs_constrained = term_1 + term_2

        # 2. modify what is sent to binary cross-entropy function (not anymore y_true and y_probs)
        loss_value = model.compiled_loss(labels, y_probs_constrained)
        # loss_value = loss_fn(labels, y_probs_constrained)

    grads = tape.gradient(loss_value, model.trainable_weights)
    # optimizer.apply_gradients(zip(grads, model.trainable_weights))
    model.optimizer.apply_gradients(zip(grads, model.trainable_weights))
    metrics.update_state(labels, y_probs_constrained)
    return loss_value
    
epochs = 1
for epoch in range(epochs):
    print(f'Start of epoch {epoch}\n')
    end = time.time()
    # Iterate over the batches of the dataset
    for step, (x_batch_train, y_batch_train) in enumerate(test):
        loss_value = train_step(x_batch_train, y_batch_train)

        # Log every 200 batches.
        
        if step % 300 == 0:
            print(f"Training loss (for one batch) at step {step}: {float(loss_value):.4f}")
            print(f"Seen so far: {(step + 1) * config['batch_size']} samples")
            temp_end = time.time()
            print(f'time: {temp_end-end}')
            end = time.time()


    # Display metrics at the end of each epoch.
    train_acc = metrics.result()
    print(f"Training acc over epoch: {float(train_acc)}")

    # Reset training metrics at the end of each epoch
    metrics.reset_states()

Unit test of the Maximum Constraint Module

In [None]:
# Unit test of maximum constraint module

# Class 1 is the parent of classes 2 and 3
mask = np.array([[1, 0, 0],
                 [1, 1, 0],
                 [1, 0, 1]], dtype=np.float32)
np.fill_diagonal(mask, 1)
mask = np.transpose(mask)

# Case 1: the parent has greater probability than both children. Nothing changes.
probs = np.array([[0.7, 0.5, 0.2]], dtype=np.float32)
assert((max_constrain(probs, mask)[0].numpy() == probs).all())

# Case 2: the parent has smaller probability than child class 2. 
probs = np.array([[0.1, 0.5, 0.2]], dtype=np.float32)
assert((max_constrain(probs, mask)[0].numpy() == np.array([0.5, 0.5, 0.2], dtype=np.float32)).all())

# Case 3: the parent has smaller probability than child class 3. 
probs = np.array([[0.1, 0.3, 0.8]], dtype=np.float32)
assert((max_constrain(probs, mask)[0].numpy() == np.array([0.8, 0.3, 0.8], dtype=np.float32)).all())