# Preliminary Steps
These are some preliminary steps before addressing the task. Import some basic libraries and set a variable that will be used in multiple steps.

In [8]:
import numpy as np
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers

# typing
from typing import List, Callable, Dict

# Dataset

## Dataset download

## Train, validation and test splits

# GloVe 

## OOV

# Models
This section is used for creating different models, going from a baseline to slightly more complicated ones.

## Constants and utilities
First of all, define some constants, parameter dictionaries and methods that will be reused by each architecture.

**Note**: the parameters defined below do not change across different architectures because the assignment is very precise, and it says that only layers must be either added or modified.

In [2]:
# TODO: all the following constants are temporary 
N_CLASSES = 20  # this must be equal to the number of tags
VOCABULARY_SIZE = 1000  # this must be obtained from the dataset
EMBEDDING_SIZE = 64  # hyper-parameter to properly set
MAX_SEQUENCE_SIZE = 100  # this must be obtained from the dataset

BATCH_SIZE = 128  # hyper-parameter to properly set
EPOCHS = 5

# Use sparse_categorical_crossentropy because labels are one hot encoded
model_compile_info = {
    'optimizer': keras.optimizers.Adam(learning_rate=1e-3),
    'loss': 'sparse_categorical_crossentropy',
    'metrics': [keras.metrics.SparseCategoricalAccuracy()],
}

training_info = {
    'verbose': 1,
    'epochs': EPOCHS,
    'batch_size': BATCH_SIZE,
    'callbacks': [keras.callbacks.EarlyStopping(monitor='val_loss', 
                                                patience=10,
                                                restore_best_weights=True)]
}

In [3]:
# This tensor should contain the weights obtained by GloVe
embedding_weights = np.zeros(shape=(VOCABULARY_SIZE, EMBEDDING_SIZE))

Define utility methods that will be used to create, train and test the models.

In [9]:
def create_and_compile_model(layers, 
                             compile_info, 
                             show_summary=True) -> keras.Model:
    """
    Create the model using the layers passed as parameters.
    After the creation, the model is compiled and its summary is possibly 
    printed to console.

    Parameters
    ----------
    layers : array
        Array that contains a list of layers that must be added 
        to the model.
    compile_info: Dictionary
        Contains information required for compiling the model.
    show_summary: bool
        If true, then the summary of the model will be printed to console
    

    Returns
    -------
    model : keras.Model
        The keras model.
    """
    model = keras.Sequential()
    
    for idx, layer in enumerate(layers):

        # Sanity checks for being sure that the last layer has been 
        # correctly set
        if idx == len(layers) - 1:
            assert layer.activation == keras.activations.softmax, 'Wrong activation function'
            assert layer.units == N_CLASSES, 'Wrong number of units'

        model.add(layer)

    # Compile
    model.compile(**compile_info)

    # Print model summary
    if show_summary:
        model.summary()
    
    return model


def train_model(model: keras.Model,
                x_train: np.ndarray,
                y_train: np.ndarray,
                x_val: np.ndarray,
                y_val: np.ndarray,
                training_info: dict):
    """
    Training routine for the Keras model.
    At the end of the training, retrieved History data is shown.

    :param model: Keras built model
    :param x_train: training data in np.ndarray format
    :param y_train: training labels in np.ndarray format
    :param x_val: validation data in np.ndarray format
    :param y_val: validation labels in np.ndarray format
    :param training_info: dictionary storing model fit() argument information

    :return
        model: trained Keras model
    """
    print("Start training! \nParameters: {}".format(training_info))
    history = model.fit(x=x_train, y=y_train,
                        validation_data=(x_val, y_val),
                        **training_info)
    print("Training completed! Showing history...")

    show_history(history)

    return model


def predict_data(model: keras.Model,
                 x: np.ndarray,
                 prediction_info: dict) -> np.ndarray:
    """
    Inference routine of a given input set of examples

    :param model: Keras built and possibly trained model
    :param x: input set of examples in np.ndarray format
    :param prediction_info: dictionary storing model predict() argument information

    :return
        predictions: predicted labels in np.ndarray format
    """

    print('Starting prediction: \n{}'.format(prediction_info))
    print('Predicting on {} samples'.format(x.shape[0]))

    predictions = model.predict(x, **prediction_info)
    return predictions


def evaluate_predictions(predictions: np.ndarray,
                         y: np.ndarray,
                         metrics: List[Callable],
                         metric_names: List[str]):
    """
    Evaluates given model predictions on a list of metric functions

    :param predictions: model predictions in np.ndarray format
    :param y: ground-truth labels in np.ndarray format
    :param metrics: list of metric functions
    :param metric_names: list of metric names

    :return
        metric_info: dictionary containing metric values for each input metric
    """

    assert len(metrics) == len(metric_names)

    print("Evaluating predictions! Total samples: ", y.shape[0])

    metric_info = {}

    for metric, metric_name in zip(metrics, metric_names):
        metric_value = metric(y_pred=predictions, y_true=y)
        metric_info[metric_name] = metric_value

    return metric_info


## Baseline

In [10]:
def baseline_model(compile_info, embedding_weights=None) -> keras.Model:
    """
    Define all the layers that must be contained in the baseline model.

    Parameters
    ----------
    layers : array
        Array that contains a list of layers that must be added 
        to the model.
    compile_info: Dictionary
        Contains information required for compiling the model.
    embedding_weights: np.array
        Array containing the weights of for the embedding layer.
    

    Returns
    -------
    model : keras.Model
        The keras model.
    """
    model_layers = []

    # Embedding layer
    model_layers.append(
        layers.Embedding(
            input_dim=VOCABULARY_SIZE, 
            output_dim=EMBEDDING_SIZE, 
            input_length=MAX_SEQUENCE_SIZE,
            weights=[embedding_weights],
            mask_zero=True
            )
        )
    
    # TODO: try different activation functions??

    # LSTM layer
    model_layers.append(
        layers.LSTM(128, return_sequences=True, activation='relu')
        )

    # Dense layer
    model_layers.append(
        layers.Dense(N_CLASSES, activation='softmax')
        )

    # Create the model
    model = create_and_compile_model(model_layers, model_compile_info)

    return model

In [16]:
model_1 = baseline_model(model_compile_info, embedding_weights)

# Model sanity check for seeing if it runs correctly
input_tensor = np.random.uniform(size=(BATCH_SIZE, MAX_SEQUENCE_SIZE))
print(f'Input tensor shape: {input_tensor.shape}')
output_tensor = model_1(input_tensor)
print(f'Output tensor shape: {output_tensor.shape}')

Model: "sequential_4"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 embedding_4 (Embedding)     (None, 100, 64)           64000     
                                                                 
 lstm_4 (LSTM)               (None, 100, 128)          98816     
                                                                 
 dense_4 (Dense)             (None, 100, 20)           2580      
                                                                 
Total params: 165,396
Trainable params: 165,396
Non-trainable params: 0
_________________________________________________________________
Input tensor shape: (128, 100)
Output tensor shape: (128, 100, 20)


## Variations

### GRU

### Additional LSTM layer

### Additional Dense layer

# Training and Experiments

# Disussion and Error Analysis