# Preliminary Steps
These are some preliminary steps before addressing the task. Import some basic libraries and set a variable that will be used in multiple steps.

In [2]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers

# Dataset

## Dataset download

## Train, validation and test splits

# GloVe 

## OOV

# Models
This section is used for creating different models, going from a baseline to slightly more complicated ones.

## Baseline

https://stackoverflow.com/questions/61361866/dense-vs-timedistributeddense


In [72]:
# TODO: all the following constants are temporary 
N_CLASSES = 20  # this must be equal to the number of tags
VOCABULARY_SIZE = 1000  # this must be obtained from the dataset
EMBEDDING_SIZE = 64  # hyper-parameter to properly set
MAX_SEQUENCE_SIZE = 100  # this must be obtained from the dataset
BATCH_SIZE = 128  # hyper-parameter to properly set

In [73]:
def create_model(layers) -> keras.Model:
    """
    Compute the NAIVE version of the convolution.
    After the creation, the model is compiled and its summary is printed 
    to console.

    Parameters
    ----------
    layers : array
        Array that contains a list of layers that must be added 
        to the model

    Returns
    -------
    model : keras.Model
        The keras model
    """
    model = keras.Sequential()

    for idx, layer in enumerate(layers):

        # Sanity checks for being sure that the last layer has been 
        # correctly set
        if idx == len(layers) - 1:
            assert layer.activation == keras.activations.softmax, 'Wrong activation function'
            assert layer.units == N_CLASSES, 'Wrong number of units'

        model.add(layer)

    # Compile
    model.compile(**compile_info)

    # Print model summary
    model.summary()
    
    return model

In [74]:
# This tensor should contain the weights obtained by GloVe
embedding_weights = tf.zeros((VOCABULARY_SIZE, EMBEDDING_SIZE))

def baseline_model(compile_info, embedding_weights = None) -> keras.Model:

    model_layers = []

    model_layers.append(
        layers.Embedding(
            input_dim=VOCABULARY_SIZE, 
            output_dim=EMBEDDING_SIZE, 
            input_length=SEQUENCE_SIZE,
            weights=[embedding_weights],
            mask_zero=True
            )
        )
    
    # TODO: try different activation functions??
    # LSTM layer
    model_layers.append(
        layers.LSTM(128, return_sequences=True, activation='relu')
        )

    # Dense layer
    model_layers.append(
        layers.Dense(N_CLASSES, activation='softmax')
        )

    # Create the model
    model = create_model(model_layers)

    return model

In [76]:
# Use sparse_categorical_crossentropy because labels are one hot encoded
compile_info = {
    'optimizer': keras.optimizers.Adam(learning_rate=1e-3),
    'loss': 'sparse_categorical_crossentropy',
    'metrics': [keras.metrics.SparseCategoricalAccuracy()],
}

model_1 = baseline_model(compile_info, embedding_weights)


# Create a random input tensor for testing the model
# The input tensor should be the result
input_tensor = tf.random.uniform((BATCH_SIZE, SEQUENCE_SIZE))
print(f'Input tensor shape: {input_tensor.shape}')
output_tensor = model_1(input_tensor)
print(f'Output tensor shape: {output_tensor.shape}')


Model: "sequential_42"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 embedding_42 (Embedding)    (None, 100, 64)           64000     
                                                                 
 lstm_41 (LSTM)              (None, 100, 128)          98816     
                                                                 
 dense_50 (Dense)            (None, 100, 20)           2580      
                                                                 
Total params: 165,396
Trainable params: 165,396
Non-trainable params: 0
_________________________________________________________________
Input tensor shape: (128, 100)
Output tensor shape: (128, 100, 20)


## Baseline variations

### GRU

### Additional LSTM layer

### Additional Dense layer

# Training and Experiments

# Disussion and Error Analysis