# Preliminary Steps
These are some preliminary steps before addressing the task. Import some basic libraries and set a variable that will be used in multiple steps.

In [None]:
import os
import zipfile
import urllib.request
import re
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers

# typing
from typing import List, Callable, Dict

# Dataset

In [None]:
dataset_name = "dependency_treebank"
dataset_folder = os.path.join(os.getcwd(), "Datasets", "Original")

#Create directory to download dataset
if not os.path.exists(dataset_folder):
    os.makedirs(dataset_folder)

## Dataset download

In [None]:
#URL to the dataset
url = 'https://raw.githubusercontent.com/nltk/nltk_data/gh-pages/packages/corpora/dependency_treebank.zip'

dataset_path = os.path.join(dataset_folder, "dependency_treebank.zip")

#Download the dataset
if not os.path.exists(dataset_path):
    urllib.request.urlretrieve(url, dataset_path)
    print("Successful download")


## Dataset Extraction

In [None]:
#Extract the dataset
with zipfile.ZipFile(dataset_path, 'r') as zip_ref:
    zip_ref.extractall(dataset_folder)

print("Successful extraction")

## Cleaning of text and dataframe creation

In [None]:
#The symbols allowed in the tokens
GOOD_SYMBOLS_RE = re.compile('[^0-9a-z #+_]')
def prepreocess(token : str) -> str:
	"""
	Calls the function that cleans the text
	Input: the string to process
	Output: the processed string
	"""
	token = text_to_lower(token)
	token = filter_out_uncommon_symbols(token)
	token = strip_text(token)
	return token

def text_to_lower(text: str) -> str:
	"""
	Returns the string in lower character
	Input: the string to process
	Output: the processed string
	"""
	return text.lower();

def filter_out_uncommon_symbols(text: str) -> str:
    """
    Removes any special character that is not in the
    good symbols list (check regular expression)
    """

    return GOOD_SYMBOLS_RE.sub('', text)

def strip_text(text: str) -> str:
    """
    Removes any left or right spacing (including carriage return) from text.
    Example:
    Input: '  This assignment is cool\n'
    Output: 'This assignment is cool'
    """

    return text.strip()

dataframe_rows = []
folder = os.path.join(os.getcwd(), "Datasets", "Original", dataset_name)
#Iterate along the files
for filename in os.listdir(folder):
	file_path = os.path.join(folder, filename)
	print(filename)
	#Ignore useless files
	if filename == ".DS_Store":
		continue
	try:
		if os.path.isfile(file_path):
			# open the file
			with open(file_path, mode='r', encoding='utf-8') as text_file:
				#Fetch the filenumber from the filename
				number_of_file = int(filename.split("_")[1].split(".")[0])
				#Split in different groups
				if number_of_file < 101:
					split = "train"
				elif number_of_file < 151:
					split = "validation"
				else:
					split = "test"
				#Stop at end of file
				for line in text_file:
					if line == "\n":
						break
					columns = line.split()
					token = columns[0]
					token = prepreocess(token)
					label = columns[1]
					dataframe_row = {"split": split, "token": token, "label": label}
					dataframe_rows.append(dataframe_row)
	except Exception as e:
                print('Failed to process %s. Reason: %s' % (file_path, e))
                sys.exit(0)
#Create processed dataframe folder
folder = os.path.join(os.getcwd(), "Datasets", "Dataframes", dataset_name)
if not os.path.exists(folder):
    os.makedirs(folder)
# transform the list of rows in a proper dataframe
dataframe = pd.DataFrame(dataframe_rows)
dataframe = dataframe[["split", "token", "label"]]
dataframe_path = os.path.join(folder, dataset_name + ".pkl")
dataframe.to_pickle(dataframe_path)

dataframe_path = os.path.join(os.getcwd(), "Datasets", "Dataframes", dataset_name, dataset_name + ".pkl")
df = pd.read_pickle(dataframe_path)

# GloVe
This section is the one responsible for the implementation of the GloVe embedding system.

## Constants and utilities


In [None]:
#Requested variables from the pre-processing methods:
VOCAB_SIZE = 1000 #Temporary, it should be defined above
words_to_tokens = {} #Dictionary that associates each word from the training+validation+test set with a token
training_word = [] #List of unique words from the training set (or a method to get it)
validation_words = [] #List of unique words from the validation set (or a method to get it)
test_words = [] #List of unique words from the test set (or a method to get it)
training_sentences = [] #List of sentences/documents from the training set (or a method to get it)
validation_sentences = [] #List of sentences/documents from the validation set (or a method to get it)
test_sentences = [] #List of sentences/documents from the test set (or a method to get it)
#NB: a sentence or a document should be a List of words
#---------------------------------------------------

URL_BASE = "https://nlp.stanford.edu/data" #Location of the pre-trained GloVe's files
GLOVE_VERSION = "6B"

EMBEDDING_SIZE = 50 #The dimensionality of the embeddings; to be tested

#List of paths to download and extract GloVe's files
PATHS = {
    "url": URL_BASE + "/glove." + GLOVE_VERSION + ".zip",
    "glove_path": os.path.join(os.getcwd(),"Glove",GLOVE_VERSION),
    "glove_zip": os.path.join(os.getcwd(),"Glove", GLOVE_VERSION, "glove."+GLOVE_VERSION+".zip"),
    "glove_file": os.path.join(os.getcwd(),"Glove", GLOVE_VERSION, "glove."+GLOVE_VERSION+"."+str(EMBEDDING_SIZE)+"d.txt")
}

OOV_METHOD = "Mean" #Determine which OOV method to adopt; choose one between "Mean", "Random" and "Placeholder"

## Download
In this part the presence of the GloVe file is checked. In case of a negative response, it will be downloaded and extracted.

In [None]:
def setup_files():

  '''
  Create the folder if it does not exist.
  Then download the zip file from the web archive if it does not exist.
  Finally exctract the zip file of the GloVe txt file does not exist in the folder.
  '''

  if not os.path.exists(PATHS["glove_path"]):
    os.makedirs(PATHS["glove_path"])

  if not os.path.exists(PATHS["glove_file"]):
    if not os.path.exists(PATHS["glove_zip"]):
      download_glove(PATHS["url"])

    extract_glove(PATHS["glove_zip"],PATHS["glove_path"])

def download_glove(url: str):

    '''
    Download GloVe's zip file from the web.
    '''

    urllib.request.urlretrieve(url, PATHS['glove_zip'])
    print("Successful download")

def extract_glove(zip_file: str,
                  glove_path: str):
  
    '''
    Extract GloVe's zip file.
    '''
  
    with zipfile.ZipFile(PATHS["glove_zip"], 'r') as zip_ref:
      zip_ref.extractall(path=PATHS["glove_path"])
      print("Successful extraction")

## Initialization
In this step, the downloaded GloVe file is loaded into an embedding vocabulary

In [None]:
def load_model(glove_file: str) ->Dict:

  '''
  Open GloVe's txt file and store each of its contained words
  into a dictionary along with their correspondent embedding weights.

  Parameters:
  ----------
  glove_file : str
      GloVe's txt file path.

  Returns:
  -------
  vocabulary: Dict
      GloVe's vocabulary

  '''

  print("Loading GloVe Model...")

  with open(glove_file, encoding="utf8" ) as f: #Open the txt file
      lines = f.readlines() #Read the file line by line

  vocabulary = {}
  for line in lines:
      splits = line.split()
      #Save the first part of the line (word) as the dictionary's key and the second part (the embedding) as the key
      vocabulary[splits[0]] = np.array([float(val) for val in splits[1:]])

  print("GloVe model loaded")

  return vocabulary

Loading GloVe Model...
GloVe model loaded


## OOV
In this section, some possible "Out Of Vocabulary" handling methods are implemented, along with other OOV-related functions.

In [None]:
#OOV-handling: possible methods
def random_embedding(embedding_size: int) ->np.array:
  '''
  Return a numpy array with random values sampled from a uniform distribution

  Parameters:
  ----------
  embedding_size: int
    The embedding size that is used as the size of the numpy array.

  Results:
  -------
  np.array
  A randomized numpy array.
  '''

  return np.random.uniform(low=-0.05, high=0.05, size=embedding_size)

def placeholder_embedding(embedding_size: int) ->np.ndarray:
  '''
    Return a numpy array with all zeros

    Parameters:
    ----------
    embedding_size: int
      The embedding size that is used as the size of the numpy array.

    Results:
    -------
    np.array
    A numpy array filled with zeros.
    '''

  return np.zeros(shape=(embedding_size))

def neighbours_mean_embedding(word: str,
                              glove_embedding: Dict[str,int],
                              sentences: List[List[str]]):
  
  '''
  Compute the embedding of an OOV word by taking the mean
  of its neighbours.

  Parameters:
  ---------
  word: str
      The OOV that needs to be embedded.
  glove_embedding: Dict[str, int]
      GloVe's embedding.
  sentences: List[List[str]]
      A list of all the sentences (lists of words) in the current set.

  Returns:
  -------
  mean: int
      The mean of the embedding values of OOV-word's neighbours.
  '''
  
  neighbours = {}

  for sentence in sentences:
    if word in sentence:
      index = sentence.index(word)
      if index!=0:
        left_neighbour = sentence[index-1]
        if left_neighbour in glove_embedding: #Consider only words that are not OOV
          neighbours.add(left_neighbour)
      if index!=len(sentence-1):
        right_neighbour = sentence[index+1]
        if right_neighbour in glove_embedding: #Consider only words that are not OOV
          neighbours.add(right_neighbour)

  neighbours_embeddings = np.array([glove_embedding[neighbour] for neighbour in neighbours])
  return np.mean(neighbours_embeddings)


#Others
def get_oov_list(words: List[str],
                 glove_embedding: Dict[str, int]) ->List[str]:

  '''
  Return a list of all the words that are not part of the GloVe embedding

  Parameters:
  ----------
  words: List[str]
      A list of unique words from a set of documents.
  glove_embedding: Dict[str, int]
      GloVe's embedding.

  Returns:
  -------
  oov: List[str]
      A list of all the OOV terms.
  '''

  embedding_vocabulary = set(glove_embedding.keys())
  oov = set(words).difference(embedding_vocabulary)
  return list(oov)

##Embedding matrix
Now, having opted for an OOV method, it is possible to create the embedding matrix, which associates the embedding to the correspondent word for the entire vocabulary.

In [None]:
def update_embeddings(glove_embedding: Dict[str, int],
                     new_embeddings: Dict[str, int]):
  
  '''
  Update the GloVe's embeddings by adding the new embeddings of
  the previous OOV words.

  Parameters:
  ----------
  glove_embedding: Dict[str, int]
      GloVe's embedding.
  new_embeddings: Dict[str, int]
      A dictionary containing the new embeddings
      for the analyzed OOV words.
  '''
  
  #Merge GloVe's embeddings with the new discoveries
  glove_embedding = glove_embedding | new_embeddings

def update_embedding_matrix(starting_embedding_matrix: np.ndarray,
                            glove_embedding: Dict[str, int],
                            embedding_size: int,
                            words: List[str],
                            words_to_tokens: Dict[str,int]
                            oov_method: str,
                            sentences: List[List[str]]) ->np.ndarray

  '''
  Build an embedding matrix updating the values of a previous matrix based on
  a new set of sentences and an updated GloVe embedding.

  Parameters:
  ----------
  starting_embedding_matrix: np.ndarray
      The starting embedding matrix.
  glove_embedding: Dict[str, int]
      GloVe's embedding, eventually updated in the previous step with the
      previous OOV terms.
  embedding_size: int
      The dimensions of the embedding.
  words: List[str]
      A list of unique words in the set of sentences.
  words_to_tokens: Dict[str, int]
      A dictionary that associates each word with a token.
  oov_method: str
      The name of the OOV method to use to handle OOV cases.
  sentences: List[List[str]]
      A list of all the sentences (lists of words) in the current set.

  Returns:
  -------
  embedding_matrix: np.ndarray
      The updated embedded matrix that associates each word of the vocabulary
      with its corresponding embedding.
  '''

  oov_terms = get_oov_list(words,glove_embedding)
  #A copy of the original matrix is returned
  embedding_matrix = np.copy(starting_embedding_matrix)
  discovered_embeddings = {}

  for word in tqdm(words):

    token = words_to_tokens[word]
    if np.all((embedding_matrix[token] == 0)):

      if word in oov_terms: #Hanlde the OOV case with one of the methods
        if oov_method == "Random":
          embedding_vector = random_embedding(embedding_size)
        elif oov_method == "Placeholder":
          embedding_vector = placeholder_embedding(embedding_size)
        elif oov_method == "Mean":
          embedding_vector = neighbours_mean_embedding(word, glove_embedding, sentences)
        else:
          raise "Invalid OOV method"
        
        discovered_embeddings[word] = embedding_vector

      else:
        embedding_vector = glove_embedding[word]

      embedding_matrix[token] = embedding_vector #Update the embedding matrix

  #The computed values for the OOV words update the GloVe embeddings at the end of the process.
  #Updating these values at runtime affects the "Mean" OOV method.
  update_embeddings(glove_embedding, discovered_embeddings)

  return embedding_matrix

##Train, validation and test vocabularies
Here all the previous methods defined in the above sections are exploited to create three different vocabularies.

In [None]:
setup_files() #Create a path, download and extract the files, if necessary
glove_vocab = load_model(PATHS["glove_file"]) #Load the GloVe model

embedding_matrix_0 = np.zeros((VOCAB_SIZE, EMBEDDING_SIZE), dtype=np.float32) #Create an empty embedding matrix

#Build the embedding matrix with the training set data
embedding_matrix_training = update_embedding_matrix(embedding_matrix_0,
                                                    glove_embedding,
                                                    EMBEDDING_SIZE,
                                                    training_words,
                                                    words_to_tokens,
                                                    OOV_METHOD,
                                                    training_sentences)

#Get an updated version of the embedding matrix with the validation set data
embedding_matrix_validation = update_embedding_matrix(embedding_matrix_training,
                                                    glove_embedding,
                                                    EMBEDDING_SIZE,
                                                    validation_words,
                                                    words_to_tokens,
                                                    OOV_METHOD,
                                                    validation_sentences)

#Get an updated version of the embedding matrix with the test set data
embedding_matrix_test = update_embedding_matrix(embedding_matrix_validation,
                                                    glove_embedding,
                                                    EMBEDDING_SIZE,
                                                    test_words,
                                                    words_to_tokens,
                                                    OOV_METHOD,
                                                    test_sentences)

# Models
This section is used for creating different models, going from a baseline to slightly more complicated ones.

## Constants and utilities
First of all, define some constants, parameter dictionaries and methods that will be reused by each architecture.

In [None]:
# TODO: all the following constants are temporary 
N_CLASSES = 20  # this must be equal to the number of tags
VOCABULARY_SIZE = 1000  # this must be obtained from the dataset
EMBEDDING_SIZE = 64  # hyper-parameter to properly set [Defined in GloVe/Constants and Utilities]
MAX_SEQUENCE_SIZE = 100  # this must be obtained from the dataset

BATCH_SIZE = 128  # hyper-parameter to properly set
EPOCHS = 5


# Model common compile information
# Use sparse_categorical_crossentropy because labels are one hot encoded
model_compile_info = {
    'optimizer': keras.optimizers.Adam(learning_rate=1e-3),
    'loss': 'sparse_categorical_crossentropy',
    'metrics': [keras.metrics.SparseCategoricalAccuracy()],
}

# Model common training information
training_info = {
    'verbose': 1,
    'epochs': EPOCHS,
    'batch_size': BATCH_SIZE,
    'callbacks': [keras.callbacks.EarlyStopping(monitor='val_loss', 
                                                patience=10,
                                                restore_best_weights=True)]
}

In [None]:
# This tensor should contain the weights obtained by GloVe
embedding_weights = np.zeros(shape=(VOCABULARY_SIZE, EMBEDDING_SIZE))

Define utility methods that will be used to **create**, **train** and **test** the models.

In [None]:
def create_model(name,
                 layers, 
                 compile_info, 
                 show_summary=True) -> keras.Model:
    """
    Create the model using the layers passed as parameters.
    After the creation, the model is compiled and its summary is possibly 
    printed to console.

    Parameters
    ----------
    layers : array
        Array that contains a list of layers that must be added 
        to the model.
    compile_info: Dictionary
        Contains information required for compiling the model.
    show_summary: bool
        If true, then the summary of the model will be printed to console
    

    Returns
    -------
    model : keras.Model
        The keras model.
    """
    model = keras.Sequential(name=name)
    
    for idx, layer in enumerate(layers):

        # Sanity checks for being sure that the last layer has been 
        # correctly set
        if idx == len(layers) - 1:
            assert layer.activation == keras.activations.softmax, 'Wrong activation function'
            assert layer.units == N_CLASSES, 'Wrong number of units'

        model.add(layer)

    # Compile
    model.compile(**compile_info)

    # Print model summary
    if show_summary:
        model.summary()
    
    return model


def train_model(model: keras.Model,
                x_train: np.ndarray,
                y_train: np.ndarray,
                x_val: np.ndarray,
                y_val: np.ndarray,
                training_info: dict):
    """
    Training routine for the Keras model.
    At the end of the training, retrieved History data is shown.

    :param model: Keras built model
    :param x_train: training data in np.ndarray format
    :param y_train: training labels in np.ndarray format
    :param x_val: validation data in np.ndarray format
    :param y_val: validation labels in np.ndarray format
    :param training_info: dictionary storing model fit() argument information

    :return
        model: trained Keras model
    """
    print("Start training! \nParameters: {}".format(training_info))
    history = model.fit(x=x_train, y=y_train,
                        validation_data=(x_val, y_val),
                        **training_info)
    print("Training completed! Showing history...")

    show_history(history)

    return model


def predict_data(model: keras.Model,
                 x: np.ndarray,
                 prediction_info: dict) -> np.ndarray:
    """
    Inference routine of a given input set of examples

    :param model: Keras built and possibly trained model
    :param x: input set of examples in np.ndarray format
    :param prediction_info: dictionary storing model predict() argument information

    :return
        predictions: predicted labels in np.ndarray format
    """

    print('Starting prediction: \n{}'.format(prediction_info))
    print('Predicting on {} samples'.format(x.shape[0]))

    predictions = model.predict(x, **prediction_info)
    return predictions


def evaluate_predictions(predictions: np.ndarray,
                         y: np.ndarray,
                         metrics: List[Callable],
                         metric_names: List[str]):
    """
    Evaluates given model predictions on a list of metric functions

    :param predictions: model predictions in np.ndarray format
    :param y: ground-truth labels in np.ndarray format
    :param metrics: list of metric functions
    :param metric_names: list of metric names

    :return
        metric_info: dictionary containing metric values for each input metric
    """

    assert len(metrics) == len(metric_names)

    print("Evaluating predictions! Total samples: ", y.shape[0])

    metric_info = {}

    for metric, metric_name in zip(metrics, metric_names):
        metric_value = metric(y_pred=predictions, y_true=y)
        metric_info[metric_name] = metric_value

    return metric_info

def model_sanity_check(model: keras.Model):
    """
    Create a random input_tensor and try to pass through the model.
    This method should be used in order to check if the model is 
    working as expected.

    Parameters
    ----------
    model : keras.Model
        The model that must be tried.

    """
    print(f'Sanity check for the model with name: {model.name}')
    # Model sanity check for seeing if it runs correctly
    input_tensor = np.random.uniform(size=(BATCH_SIZE, MAX_SEQUENCE_SIZE))
    print(f'Input tensor shape: {input_tensor.shape}')
    output_tensor = model(input_tensor)
    print(f'Output tensor shape: {output_tensor.shape}')

Define utility methods for **creating layers** in order to: 
* reduce the code verbosity.
* be sure to always create different architectures with the same layer structures.

In [None]:
# EMBEDDING
def embedding_layer(embedding_weights: np.array) -> layers.Embedding:
    """
    Create an embedding layer.

    Parameters
    ----------
    embedding_weights : np.array
        The weights for the embedding layer.
    
    Returns
    -------
    layer : layers.Embedding
        The created embedding layer.
    """
    layer = layers.Embedding(
        input_dim=VOCABULARY_SIZE, 
        output_dim=EMBEDDING_SIZE, 
        input_length=MAX_SEQUENCE_SIZE,
        weights=[embedding_weights],
        mask_zero=True
        )
    return layer

# RNN (LSTM and GRU)
def _rnn_size(layer_depth: int) -> int:
    """
    Simple logic used for assigning the number of units 
    to the rnn layer.

    Parameters
    ----------
    layer_depth : int
        The depth of the layer.
    
    Returns
    -------
    size : int
        The number units.
    """
    size = 64
    if layer_depth > 1:
        size = 128
    return size

def bilstm_layer(layer_depth: int) -> layers.Bidirectional:
    """
    Create a bidirectional lstm layer.

    Parameters
    ----------
    layer_depth : int
        The depth of the layer.
    
    Returns
    -------
    layer : layers.Bidirectional
        The created bidirectional lstm layer.
    """
    size = _rnn_size(layer_depth)
    layer = layers.Bidirectional(
        layers.LSTM(size, return_sequences=True, activation='relu')
        )
    return layer

def bigru_layer(layer_depth: int) -> layers.Bidirectional:
    """
    Create a bidirectional gru layer

    Parameters
    ----------
    layer_depth : int
        The depth of the layer.
    
    Returns
    -------
    layer : layers.Bidirectional
        The created bidirectional gru layer.
    """
    size = _rnn_size(layer_depth)
    layer = layers.Bidirectional(
        layers.GRU(size, return_sequences=True, activation='relu')
        )
    return layer

# DENSE
def _dense_size(last_layer:bool) -> int:
    """
    Simple logic for assigning the size of the dense layer.

    Parameters
    ----------
    last_layer : bool
        Indicates if the layer that must be created is the last
        one of the network.
    
    Returns
    -------
    size : int
        The size of the dense layer.
    """
    size = N_CLASSES
    if not last_layer:
        size = 256
    return size

def _dense_activation(last_layer:bool) -> str:
    """
    Simple logic for assigning the activation function of the dense layer.

    Parameters
    ----------
    last_layer : bool
        Indicates if the layer that must be created is the last
        one of the network.
    
    Returns
    -------
    activation : str
        The activation function of the layer.
    """
    activation = 'relu'
    if last_layer:
        activation = 'softmax'
    return activation

def dense_layer(last_layer:bool) -> layers.Dense:
    """
    Create a dense layer

    Parameters
    ----------
    last_layer : bool
        Indicates if the layer that must be created is the last
        one of the network.
    
    Returns
    -------
    layer : layers.Dense
        The created dense layer.
    """
    size = _dense_size(last_layer)
    activation = _dense_activation(last_layer)
    
    return layers.Dense(size, activation=activation)

# MODEL SANITY CHECK


## Baseline

In [None]:
# Create layers
baseline_layers = [
                embedding_layer(embedding_weights=embedding_weights),
                bilstm_layer(layer_depth=1),
                dense_layer(last_layer=True)
]

# Create the model
baseline_model = create_model('baseline', 
                              baseline_layers, 
                              model_compile_info)

# Check if the model can actually run
model_sanity_check(baseline_model)

Model: "baseline"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 embedding_5 (Embedding)     (None, 100, 64)           64000     
                                                                 
 bidirectional_4 (Bidirectio  (None, 100, 128)         66048     
 nal)                                                            
                                                                 
 dense_4 (Dense)             (None, 100, 20)           2580      
                                                                 
Total params: 132,628
Trainable params: 132,628
Non-trainable params: 0
_________________________________________________________________
Sanity check for the model with name: baseline
Input tensor shape: (128, 100)
Output tensor shape: (128, 100, 20)


## Variations
What follows is the implementation of small variations to the baseline architecture.

### GRU
Change the LSTM layer with the GRU layer

In [None]:
# Create layers
baseline_var1_layers = [
                embedding_layer(embedding_weights=embedding_weights),
                bigru_layer(layer_depth=1),
                dense_layer(last_layer=True)
]

# Create the model
baseline_var1_model = create_model('baseline_var1', 
                              baseline_var1_layers, 
                              model_compile_info)

# Check if the model can actually run
model_sanity_check(baseline_var1_model)

Model: "baseline_var1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 embedding_6 (Embedding)     (None, 100, 64)           64000     
                                                                 
 bidirectional_5 (Bidirectio  (None, 100, 128)         49920     
 nal)                                                            
                                                                 
 dense_5 (Dense)             (None, 100, 20)           2580      
                                                                 
Total params: 116,500
Trainable params: 116,500
Non-trainable params: 0
_________________________________________________________________
Sanity check for the model with name: baseline_var1
Input tensor shape: (128, 100)
Output tensor shape: (128, 100, 20)


### Additional LSTM layer

In [None]:
# Create layers
baseline_var2_layers = [
                embedding_layer(embedding_weights=embedding_weights),
                bilstm_layer(layer_depth=1),
                bilstm_layer(layer_depth=2),
                dense_layer(last_layer=True)
]

# Create the model
baseline_var2_model = create_model('baseline_var2', 
                              baseline_var2_layers, 
                              model_compile_info)

# Check if the model can actually run
model_sanity_check(baseline_var2_model)

Model: "baseline_var2"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 embedding_9 (Embedding)     (None, 100, 64)           64000     
                                                                 
 bidirectional_10 (Bidirecti  (None, 100, 128)         66048     
 onal)                                                           
                                                                 
 bidirectional_11 (Bidirecti  (None, 100, 256)         263168    
 onal)                                                           
                                                                 
 dense_8 (Dense)             (None, 100, 20)           5140      
                                                                 
Total params: 398,356
Trainable params: 398,356
Non-trainable params: 0
_________________________________________________________________
Sanity check for the model with name: baseline_

### Additional Dense layer

In [None]:
# Create layers
baseline_var3_layers = [
                embedding_layer(embedding_weights=embedding_weights),
                bilstm_layer(layer_depth=1),
                dense_layer(last_layer=False),
                dense_layer(last_layer=True)
]

# Create the model
baseline_var3_model = create_model('baseline_var3', 
                              baseline_var3_layers, 
                              model_compile_info)

# Check if the model can actually run
model_sanity_check(baseline_var3_model)

Model: "baseline_var3"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 embedding_10 (Embedding)    (None, 100, 64)           64000     
                                                                 
 bidirectional_12 (Bidirecti  (None, 100, 128)         66048     
 onal)                                                           
                                                                 
 dense_9 (Dense)             (None, 100, 256)          33024     
                                                                 
 dense_10 (Dense)            (None, 100, 20)           5140      
                                                                 
Total params: 168,212
Trainable params: 168,212
Non-trainable params: 0
_________________________________________________________________
Sanity check for the model with name: baseline_var3
Input tensor shape: (128, 100)
Output tensor shape: (128, 100

# Training and Experiments

# Disussion and Error Analysis