# Preliminary Steps
These are some preliminary steps before addressing the task. Import some basic libraries and set a variable that will be used in multiple steps.

In [1]:
import numpy as np
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers

# typing
from typing import List, Callable, Dict

# Dataset

## Dataset download

## Train, validation and test splits

# GloVe 

## OOV

# Models
This section is used for creating different models, going from a baseline to slightly more complicated ones.

## Constants and utilities
First of all, define some constants, parameter dictionaries and methods that will be reused by each architecture.

In [2]:
# TODO: all the following constants are temporary 
N_CLASSES = 20  # this must be equal to the number of tags
VOCABULARY_SIZE = 1000  # this must be obtained from the dataset
EMBEDDING_SIZE = 64  # hyper-parameter to properly set
MAX_SEQUENCE_SIZE = 100  # this must be obtained from the dataset

BATCH_SIZE = 128  # hyper-parameter to properly set
EPOCHS = 5


# Model common compile information
# Use sparse_categorical_crossentropy because labels are one hot encoded
model_compile_info = {
    'optimizer': keras.optimizers.Adam(learning_rate=1e-3),
    'loss': 'sparse_categorical_crossentropy',
    'metrics': [keras.metrics.SparseCategoricalAccuracy()],
}

# Model common training information
training_info = {
    'verbose': 1,
    'epochs': EPOCHS,
    'batch_size': BATCH_SIZE,
    'callbacks': [keras.callbacks.EarlyStopping(monitor='val_loss', 
                                                patience=10,
                                                restore_best_weights=True)]
}

In [3]:
# This tensor should contain the weights obtained by GloVe
embedding_weights = np.zeros(shape=(VOCABULARY_SIZE, EMBEDDING_SIZE))

Define utility methods that will be used to **create**, **train** and **test** the models.

In [24]:
def create_model(name,
                 layers, 
                 compile_info, 
                 show_summary=True) -> keras.Model:
    """
    Create the model using the layers passed as parameters.
    After the creation, the model is compiled and its summary is possibly 
    printed to console.

    Parameters
    ----------
    layers : array
        Array that contains a list of layers that must be added 
        to the model.
    compile_info: Dictionary
        Contains information required for compiling the model.
    show_summary: bool
        If true, then the summary of the model will be printed to console
    

    Returns
    -------
    model : keras.Model
        The keras model.
    """
    model = keras.Sequential(name=name)
    
    for idx, layer in enumerate(layers):

        # Sanity checks for being sure that the last layer has been 
        # correctly set
        if idx == len(layers) - 1:
            assert layer.activation == keras.activations.softmax, 'Wrong activation function'
            assert layer.units == N_CLASSES, 'Wrong number of units'

        model.add(layer)

    # Compile
    model.compile(**compile_info)

    # Print model summary
    if show_summary:
        model.summary()
    
    return model


def train_model(model: keras.Model,
                x_train: np.ndarray,
                y_train: np.ndarray,
                x_val: np.ndarray,
                y_val: np.ndarray,
                training_info: dict):
    """
    Training routine for the Keras model.
    At the end of the training, retrieved History data is shown.

    :param model: Keras built model
    :param x_train: training data in np.ndarray format
    :param y_train: training labels in np.ndarray format
    :param x_val: validation data in np.ndarray format
    :param y_val: validation labels in np.ndarray format
    :param training_info: dictionary storing model fit() argument information

    :return
        model: trained Keras model
    """
    print("Start training! \nParameters: {}".format(training_info))
    history = model.fit(x=x_train, y=y_train,
                        validation_data=(x_val, y_val),
                        **training_info)
    print("Training completed! Showing history...")

    show_history(history)

    return model


def predict_data(model: keras.Model,
                 x: np.ndarray,
                 prediction_info: dict) -> np.ndarray:
    """
    Inference routine of a given input set of examples

    :param model: Keras built and possibly trained model
    :param x: input set of examples in np.ndarray format
    :param prediction_info: dictionary storing model predict() argument information

    :return
        predictions: predicted labels in np.ndarray format
    """

    print('Starting prediction: \n{}'.format(prediction_info))
    print('Predicting on {} samples'.format(x.shape[0]))

    predictions = model.predict(x, **prediction_info)
    return predictions


def evaluate_predictions(predictions: np.ndarray,
                         y: np.ndarray,
                         metrics: List[Callable],
                         metric_names: List[str]):
    """
    Evaluates given model predictions on a list of metric functions

    :param predictions: model predictions in np.ndarray format
    :param y: ground-truth labels in np.ndarray format
    :param metrics: list of metric functions
    :param metric_names: list of metric names

    :return
        metric_info: dictionary containing metric values for each input metric
    """

    assert len(metrics) == len(metric_names)

    print("Evaluating predictions! Total samples: ", y.shape[0])

    metric_info = {}

    for metric, metric_name in zip(metrics, metric_names):
        metric_value = metric(y_pred=predictions, y_true=y)
        metric_info[metric_name] = metric_value

    return metric_info

def model_sanity_check(model: keras.Model):
    """
    Create a random input_tensor and try to pass through the model.
    This method should be used in order to check if the model is 
    working as expected.

    Parameters
    ----------
    model : keras.Model
        The model that must be tried.

    """
    print(f'Sanity check for the model with name: {model.name}')
    # Model sanity check for seeing if it runs correctly
    input_tensor = np.random.uniform(size=(BATCH_SIZE, MAX_SEQUENCE_SIZE))
    print(f'Input tensor shape: {input_tensor.shape}')
    output_tensor = model(input_tensor)
    print(f'Output tensor shape: {output_tensor.shape}')

Define utility methods for **creating layers** in order to: 
* reduce the code verbosity.
* be sure to always create different architectures with the same layer structures.

In [23]:
# EMBEDDING
def embedding_layer(embedding_weights: np.array) -> layers.Embedding:
    """
    Create an embedding layer.

    Parameters
    ----------
    embedding_weights : np.array
        The weights for the embedding layer.
    
    Returns
    -------
    layer : layers.Embedding
        The created embedding layer.
    """
    layer = layers.Embedding(
        input_dim=VOCABULARY_SIZE, 
        output_dim=EMBEDDING_SIZE, 
        input_length=MAX_SEQUENCE_SIZE,
        weights=[embedding_weights],
        mask_zero=True
        )
    return layer

# RNN (LSTM and GRU)
def _rnn_size(layer_depth: int) -> int:
    """
    Simple logic used for assigning the number of units 
    to the rnn layer.

    Parameters
    ----------
    layer_depth : int
        The depth of the layer.
    
    Returns
    -------
    size : int
        The number units.
    """
    size = 64
    if layer_depth > 1:
        size = 128
    return size

def bilstm_layer(layer_depth: int) -> layers.Bidirectional:
    """
    Create a bidirectional lstm layer.

    Parameters
    ----------
    layer_depth : int
        The depth of the layer.
    
    Returns
    -------
    layer : layers.Bidirectional
        The created bidirectional lstm layer.
    """
    size = _rnn_size(layer_depth)
    layer = layers.Bidirectional(
        layers.LSTM(size, return_sequences=True, activation='relu')
        )
    return layer

def bigru_layer(layer_depth: int) -> layers.Bidirectional:
    """
    Create a bidirectional gru layer

    Parameters
    ----------
    layer_depth : int
        The depth of the layer.
    
    Returns
    -------
    layer : layers.Bidirectional
        The created bidirectional gru layer.
    """
    size = _rnn_size(layer_depth)
    layer = layers.Bidirectional(
        layers.GRU(size, return_sequences=True, activation='relu')
        )
    return layer

# DENSE
def _dense_size(last_layer:bool) -> int:
    """
    Simple logic for assigning the size of the dense layer.

    Parameters
    ----------
    last_layer : bool
        Indicates if the layer that must be created is the last
        one of the network.
    
    Returns
    -------
    size : int
        The size of the dense layer.
    """
    size = N_CLASSES
    if not last_layer:
        size = 256
    return size

def _dense_activation(last_layer:bool) -> str:
    """
    Simple logic for assigning the activation function of the dense layer.

    Parameters
    ----------
    last_layer : bool
        Indicates if the layer that must be created is the last
        one of the network.
    
    Returns
    -------
    activation : str
        The activation function of the layer.
    """
    activation = 'relu'
    if last_layer:
        activation = 'softmax'
    return activation

def dense_layer(last_layer:bool) -> layers.Dense:
    """
    Create a dense layer

    Parameters
    ----------
    last_layer : bool
        Indicates if the layer that must be created is the last
        one of the network.
    
    Returns
    -------
    layer : layers.Dense
        The created dense layer.
    """
    size = _dense_size(last_layer)
    activation = _dense_activation(last_layer)
    
    return layers.Dense(size, activation=activation)

# MODEL SANITY CHECK


## Baseline

In [16]:
# Create layers
baseline_layers = [
                embedding_layer(embedding_weights=embedding_weights),
                bilstm_layer(layer_depth=1),
                dense_layer(last_layer=True)
]

# Create the model
baseline_model = create_model('baseline', 
                              baseline_layers, 
                              model_compile_info)

# Check if the model can actually run
model_sanity_check(baseline_model)

Model: "baseline"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 embedding_5 (Embedding)     (None, 100, 64)           64000     
                                                                 
 bidirectional_4 (Bidirectio  (None, 100, 128)         66048     
 nal)                                                            
                                                                 
 dense_4 (Dense)             (None, 100, 20)           2580      
                                                                 
Total params: 132,628
Trainable params: 132,628
Non-trainable params: 0
_________________________________________________________________
Sanity check for the model with name: baseline
Input tensor shape: (128, 100)
Output tensor shape: (128, 100, 20)


## Variations
What follows is the implementation of small variations to the baseline architecture.

### GRU
Change the LSTM layer with the GRU layer

In [17]:
# Create layers
baseline_var1_layers = [
                embedding_layer(embedding_weights=embedding_weights),
                bigru_layer(layer_depth=1),
                dense_layer(last_layer=True)
]

# Create the model
baseline_var1_model = create_model('baseline_var1', 
                              baseline_var1_layers, 
                              model_compile_info)

# Check if the model can actually run
model_sanity_check(baseline_var1_model)

Model: "baseline_var1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 embedding_6 (Embedding)     (None, 100, 64)           64000     
                                                                 
 bidirectional_5 (Bidirectio  (None, 100, 128)         49920     
 nal)                                                            
                                                                 
 dense_5 (Dense)             (None, 100, 20)           2580      
                                                                 
Total params: 116,500
Trainable params: 116,500
Non-trainable params: 0
_________________________________________________________________
Sanity check for the model with name: baseline_var1
Input tensor shape: (128, 100)
Output tensor shape: (128, 100, 20)


### Additional LSTM layer

In [20]:
# Create layers
baseline_var2_layers = [
                embedding_layer(embedding_weights=embedding_weights),
                bilstm_layer(layer_depth=1),
                bilstm_layer(layer_depth=2),
                dense_layer(last_layer=True)
]

# Create the model
baseline_var2_model = create_model('baseline_var2', 
                              baseline_var2_layers, 
                              model_compile_info)

# Check if the model can actually run
model_sanity_check(baseline_var2_model)

Model: "baseline_var2"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 embedding_9 (Embedding)     (None, 100, 64)           64000     
                                                                 
 bidirectional_10 (Bidirecti  (None, 100, 128)         66048     
 onal)                                                           
                                                                 
 bidirectional_11 (Bidirecti  (None, 100, 256)         263168    
 onal)                                                           
                                                                 
 dense_8 (Dense)             (None, 100, 20)           5140      
                                                                 
Total params: 398,356
Trainable params: 398,356
Non-trainable params: 0
_________________________________________________________________
Sanity check for the model with name: baseline_

### Additional Dense layer

In [21]:
# Create layers
baseline_var3_layers = [
                embedding_layer(embedding_weights=embedding_weights),
                bilstm_layer(layer_depth=1),
                dense_layer(last_layer=False),
                dense_layer(last_layer=True)
]

# Create the model
baseline_var3_model = create_model('baseline_var3', 
                              baseline_var3_layers, 
                              model_compile_info)

# Check if the model can actually run
model_sanity_check(baseline_var3_model)

Model: "baseline_var3"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 embedding_10 (Embedding)    (None, 100, 64)           64000     
                                                                 
 bidirectional_12 (Bidirecti  (None, 100, 128)         66048     
 onal)                                                           
                                                                 
 dense_9 (Dense)             (None, 100, 256)          33024     
                                                                 
 dense_10 (Dense)            (None, 100, 20)           5140      
                                                                 
Total params: 168,212
Trainable params: 168,212
Non-trainable params: 0
_________________________________________________________________
Sanity check for the model with name: baseline_var3
Input tensor shape: (128, 100)
Output tensor shape: (128, 100

# Training and Experiments

# Disussion and Error Analysis