# GRU

In [1]:
import os
os.environ["CUDA_VISIBLE_DEVICES"] = "-1"
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import multiprocessing
import pickle
from numpy import asarray

from sklearn.preprocessing import LabelBinarizer
from sklearn.metrics import ConfusionMatrixDisplay, accuracy_score, precision_score, recall_score, f1_score, auc, roc_curve, RocCurveDisplay, confusion_matrix, classification_report
from sklearn.multiclass import OneVsRestClassifier
from itertools import cycle

import tensorflow as tf
from tensorflow.keras.models import Sequential, Model
from tensorflow.keras.layers import Input, Embedding, concatenate, GRU, Dense
from keras.callbacks import EarlyStopping
from tensorflow.keras.optimizers.legacy import Adam, SGD, RMSprop, Adagrad

import skopt
from skopt import gbrt_minimize, gp_minimize
from skopt.utils import use_named_args
from skopt.space import Real, Categorical, Integer  
from tensorflow.keras import backend


# DATASET
DATASET_COLUMNS = ['Id', 'Review', 'Sentiment']
# Define a dictionary to map sentiment values to category names
senti_labels = {1: 'Negative', 2: 'Neutral', 3: 'Positive'}
senti_categories = list(senti_labels.values())
NUM_of_CLASSES = 3

input_folder_path = "./pls/Thesis_Jupyter_Final/input/"
processed_folder_path = "./pls/Thesis_Jupyter_Final/processed"

In [2]:
train = pd.read_csv(os.path.join(input_folder_path, "train.csv"))
val = pd.read_csv(os.path.join(input_folder_path, "val.csv"))
test = pd.read_csv(os.path.join(input_folder_path, "test.csv"))

x_train = train['x']
y_train = train['y']
x_val = val['x']
y_val = val['y']
x_test = test['x']
y_test = test['y']

x_train_encoded = np.load(os.path.join(processed_folder_path, "train_encoded_x.npy"))
y_train_encoded = np.load(os.path.join(processed_folder_path, "train_encoded_y.npy"))
x_val_encoded = np.load(os.path.join(processed_folder_path, "val_encoded_x.npy"))
y_val_encoded = np.load(os.path.join(processed_folder_path, "val_encoded_y.npy"))
x_test_encoded = np.load(os.path.join(processed_folder_path, "test_encoded_x.npy"))
y_test_encoded = np.load(os.path.join(processed_folder_path, "test_encoded_y.npy"))

w2v_embedding_vectors = np.load(os.path.join(processed_folder_path, "embedding_w2v_matrix.npy"))
print(w2v_embedding_vectors)

%store -r embedding_vocab_size
%store -r EMBEDDING_DIM
%store -r max_seq_length

[[ 0.          0.          0.         ...  0.          0.
   0.        ]
 [ 0.22519    -0.34231001  0.049165   ...  0.38266    -0.14099
  -0.14488   ]
 [ 0.          0.          0.         ...  0.          0.
   0.        ]
 ...
 [ 0.38699001  0.37981001  0.12822001 ...  0.44394001  0.27914
  -0.27467999]
 [ 0.82581002 -0.17398    -0.36208999 ...  0.01873    -0.34252
  -0.49366999]
 [ 0.064471    0.83850998 -0.22317    ...  0.028637    0.63722003
  -0.78961998]]


## Evaluation Functions

In [3]:
def calculate_metrics(score):
    acc =  score[1]
    loss = score[0]

    print(f"Accuracy: {acc:.2%}")
    print(f"Loss: {loss:.2f}")
    
    return acc, loss

In [4]:
def calculate_classification_report(y, y_pred, labels):
    report = classification_report(y, y_pred, labels=labels)
    print("Classification Report:\n", report)

In [5]:
def plot_confusion_matrix(y_true, y_pred, labels):
    cnf_mat = confusion_matrix(y_true, y_pred)
    mat_disp = ConfusionMatrixDisplay(confusion_matrix=cnf_mat, display_labels=labels)
    mat_disp = mat_disp.plot(cmap='Blues', xticks_rotation='vertical')
    plt.title(f'Confusion Matrix')
    plt.show()

In [6]:
def evaluate_model(model, model_name, x_encoded, y_encoded, y=None, only_metrics=True):    
    y_pred_prob = model.predict(x_encoded)

    print(f"*{model_name}")
    
    score = model.evaluate(x_encoded, y_encoded, verbose=0)
    calculate_metrics(score)
    
    senti_labels = ['negative', 'neutral', 'positive'] #TODO: to constants
    
    if not only_metrics:
        y_pred = np.argmax(y_pred_prob, axis=1) + 1
        calculate_classification_report(y, y_pred, labels=senti_labels)
        plot_confusion_matrix(y, y_pred, labels=senti_labels)
    
    print()

In [7]:
def one_hot_encode(y):
    y_encoded = np.zeros((len(y), NUM_of_CLASSES))
    for i, label in enumerate(y):
        y_encoded[i, label - 1] = 1

    return y_encoded

In [8]:
def plot_roc_curve(prob_test_vec, y_test, labels):
    fig, ax = plt.subplots(figsize=(10, 10))
    labels = labels
    colors = cycle(['limegreen', 'dodgerblue', 'red'])
    for senti, color in zip(range(NUM_of_CLASSES), colors):
        RocCurveDisplay.from_predictions(
            y_test[:, senti],
            prob_test_vec[:, senti],
            name=f"ROC curve for {labels[senti]}",
            color=color,
            ax=ax,
        )

In [9]:
def calculate_OvR_roc_auc_score(model, x, y, x_test, y_test, labels): #average??
    #y = one_hot_encode(y)
    #y_test = one_hot_encode(y_test)

    ovr_model = OneVsRestClassifier(model).fit(x, y)
    prob_test_vec = ovr_model.predict_proba(x_test)
    
    fpr, tpr, thresholds, auc_score = [], [], [], []
    for _ in range(NUM_of_CLASSES):
        fpr.append(0)
        tpr.append(0)
        thresholds.append(0)
        auc_score.append(0)
    
    for i in range(NUM_of_CLASSES):
        fpr[i], tpr[i], thresholds[i] = roc_curve(y_test[:, i], prob_test_vec[:, i])
        auc_score[i] = auc(fpr[i], tpr[i])

    print(f"AUC score: {auc_score}")
    averaged_auc_score = (sum(auc_score) / NUM_of_CLASSES)
    print(f"Averaged AUC score: {averaged_auc_score:.2f}")
    
    plot_roc_curve(prob_test_vec, y_test, labels)

In [10]:
def plot_development(history):
    acc =  history.history['accuracy']
    val_acc = history.history['val_accuracy']

    loss = history.history['loss']
    val_loss = history.history['val_loss']

    epochs = range(len(acc))
    
    plt.plot(epochs, acc, 'b', label='Training Accuracy')
    plt.plot(epochs, val_acc, 'r', label='Validation Accuracy')
    plt.title('Training and Validation Accuracy')
    plt.legend()
    
    plt.figure()
    
    plt.plot(epochs, loss, 'b', label='Training Loss')
    plt.plot(epochs, val_loss, 'r', label='Validation Loss')
    plt.title('Training and validation Loss')
    plt.legend()
    
    plt.show()

Making a basic neural net to see the baseline for accuracy with minimum tuning.

In [11]:
num_output_classes = 3
batch_size= 32
epochs=30

In [12]:
model = Sequential()
# input layer is sequence of integers (words)
model.add(Embedding(embedding_vocab_size, EMBEDDING_DIM, input_length=max_seq_length, name="embedding_layer")) # part of input layer as it transforms integers into dense vectors, input shape = (None, max_seq_length)
model.add(GRU(64, name='hidden_layer')) # hidden layer
model.add(Dense(num_output_classes, activation='softmax', name="output_layer"))
model.compile(optimizer = 'adam', loss='categorical_crossentropy', metrics=["accuracy"])
model.summary()

basic_history = model.fit(x_train_encoded, y_train_encoded, validation_data=(x_val_encoded, y_val_encoded), batch_size=batch_size, epochs=10)

accuracy = model.evaluate(x_test_encoded, y_test_encoded)[1]
print(f"Naive model Accuracy: {accuracy:.2f}")

del model

backend.clear_session()
tf.compat.v1.reset_default_graph()

2023-06-09 15:57:19.176721: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcuda.so.1'; dlerror: libcuda.so.1: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /cvmfs/hpc.rug.nl/versions/2023.01/rocky8/x86_64/amd/zen3/software/snappy/1.1.9-GCCcore-11.3.0/lib:/cvmfs/hpc.rug.nl/versions/2023.01/rocky8/x86_64/amd/zen3/software/nsync/1.25.0-GCCcore-11.3.0/lib:/cvmfs/hpc.rug.nl/versions/2023.01/rocky8/x86_64/amd/zen3/software/LMDB/0.9.29-GCCcore-11.3.0/lib:/cvmfs/hpc.rug.nl/versions/2023.01/rocky8/x86_64/amd/zen3/software/JsonCpp/1.9.5-GCCcore-11.3.0/lib:/cvmfs/hpc.rug.nl/versions/2023.01/rocky8/x86_64/amd/zen3/software/ICU/71.1-GCCcore-11.3.0/lib:/cvmfs/hpc.rug.nl/versions/2023.01/rocky8/x86_64/amd/zen3/software/giflib/5.2.1-GCCcore-11.3.0/lib:/cvmfs/hpc.rug.nl/versions/2023.01/rocky8/x86_64/amd/zen3/software/flatbuffers/2.0.7-GCCcore-11.3.0/lib:/cvmfs/hpc.rug.nl/versions/2023.01/rocky8/x86_64/amd/ze

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 embedding_layer (Embedding)  (None, 13, 100)          1100      
                                                                 
 hidden_layer (GRU)          (None, 64)                31872     
                                                                 
 output_layer (Dense)        (None, 3)                 195       
                                                                 
Total params: 33,167
Trainable params: 33,167
Non-trainable params: 0
_________________________________________________________________
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Naive model Accuracy: 0.65


For our naive model, we get an accuracy of x%. # TODO: x

## Hypterparameter Tuning

In [13]:
num_gru_layers = Integer(low=1, high=5, name='num_gru_layers')
num_gru_units = Integer(low=32, high=256, name='num_gru_units') # TODO: step Keras Tuner
learning_rate = Real(low=1e-4, high=1e-2, prior='log-uniform', name='learning_rate')
adam_decay = Real(low=1e-6,high=1e-2,name="adam_decay")
#batch_size = Integer(low=1, high=128, name='batch_size')

search_space = [
            num_gru_layers,
            num_gru_units,
            learning_rate,
            adam_decay
            ]

# Specify one or more initial points for the search of optimal parameter
default_params = [1,
                  32,
                  1e-3, 
                  1e-3,
                 ]

In [14]:
def define_gru_model(num_gru_layers, num_gru_units, learning_rate, adam_decay):
    # Start the model making process and create our first layer
    model = Sequential()
    model.add(Embedding(embedding_vocab_size, EMBEDDING_DIM, input_length=max_seq_length))

    # Create a loop making a new GRU layer for the amount passed to this model.
    # Naming the layers helps avoid tensorflow error deep in the stack trace.
    for i in range(num_gru_layers):
        name = 'layer_gru_{0}'.format(i+1)
        if i < num_gru_layers-1:
            model.add(GRU(num_gru_units, return_sequences=True, name=name))
        else:
            model.add(GRU(num_gru_units, return_sequences=False, name=name))

    # Add our classification layer.
    model.add(Dense(num_output_classes, activation='softmax'))

    # Setup our optimizer and compile
    adam = Adam(learning_rate=learning_rate, decay=adam_decay)
    model.compile(optimizer=adam, loss='categorical_crossentropy', metrics=['accuracy'])
    model.summary()
    
    return model

In [15]:
@use_named_args(dimensions=search_space)
def objective_function(num_gru_layers, num_gru_units, learning_rate, adam_decay):

    model = define_gru_model(num_gru_layers=num_gru_layers,
                         num_gru_units=num_gru_units,
                         learning_rate=learning_rate,
                         adam_decay=adam_decay
                         )

    early_stopping = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)
    history = model.fit(x_train_encoded,
                        y_train_encoded,
                        validation_data=(x_val_encoded, y_val_encoded),
                        epochs=epochs, # TODO
                        batch_size=batch_size,
                        callbacks=[early_stopping]
                        )
    #return the validation accuracy for the last epoch.
    accuracy = history.history['val_accuracy'][-1]
    loss = history.history['val_loss'][-1]
    print(f"Accuracy: {accuracy:.2%}")
    print(f"Loss: {loss:.2}\n")


    # Delete the Keras model with these hyper-parameters from memory.
    del model
    
    # Clear the Keras session, otherwise it will keep adding new
    # models to the same TensorFlow graph each time we create
    # a model with a different set of hyper-parameters.
    backend.clear_session()
    tf.compat.v1.reset_default_graph()
    
    # the optimizer aims for the lowest score, so we return our negative accuracy
    return -accuracy

## Gaussian Process Model

In [16]:
gp_result = gp_minimize(func=objective_function,
                            dimensions=search_space,
                            n_calls=12,
                            noise= 0.01,
                            n_jobs=-1,
                            kappa = 5,
                            x0=default_params)

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 embedding (Embedding)       (None, 13, 100)           1100      
                                                                 
 layer_gru_1 (GRU)           (None, 32)                12864     
                                                                 
 dense (Dense)               (None, 3)                 99        
                                                                 
Total params: 14,063
Trainable params: 14,063
Non-trainable params: 0
_________________________________________________________________
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Accuracy: 80.12%
Loss: 0.67

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 embedding (Embedding)       (None, 13, 100)           110

In [None]:
# TODO data frame summarizing parameter search
gp_best_params = {param.name: value for param, value in zip(gp_result.space, gp_result.x)}
print("Best Hyperparameters:", gp_best_params)

In [None]:
model = define_gru_model(gp_best_params['num_gru_layers'], 
                          gp_best_params['num_gru_units'], 
                          gp_best_params['learning_rate'], 
                          gp_best_params['adam_decay']
                          )

early_stopping = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True) # TODO: should I, again?
history = model.fit(x_train_encoded,
                    y_train_encoded,
                    validation_data=(x_val_encoded, y_val_encoded),
                    epochs=epochs, # TODO
                    batch_size=batch_size,
                    callbacks=[early_stopping]
                    )
plot_development(history)

In [None]:
model.evaluate(x_train_encoded, y_train_encoded)
evaluate_model(model, "Train single-GRU", x_train_encoded, y_train_encoded, only_metrics=True)

model.evaluate(x_val_encoded, y_val_encoded, verbose=0)
evaluate_model(model, "Val single-GRU", x_val_encoded, y_val_encoded, only_metrics=True)

model.evaluate(x_test_encoded, y_test_encoded, verbose=0)
evaluate_model(model, "Test single-GRU", x_test_encoded, y_test_encoded, y_test, only_metrics=False)
senti_labels = ['negative', 'neutral', 'positive'] # TODO
#calculate_OvR_roc_auc_score(model, x_train, y_train, x_test, y_test, senti_labels)

## Gradient Boosted Model

In [None]:
gbrt_result = gbrt_minimize(func=objective_function,
                            dimensions=search_space,
                            n_calls=12,
                            n_jobs=-1,
                            x0=default_params)

In [None]:
# TODO data frame summarizing parameter search
gbrt_best_params = {param.name: value for param, value in zip(gbrt_result.space, gbrt_result.x)}
print("Best Hyperparameters:", gbrt_best_params)

In [None]:
model = define_gru_model(gbrt_best_params['num_gru_layers'], 
                          gbrt_best_params['num_gru_units'], 
                          gbrt_best_params['learning_rate'], 
                          gbrt_best_params['adam_decay']
                          )

early_stopping = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True) # TODO: should I, again?
history = model.fit(x_train_encoded,
                    y_train_encoded,
                    validation_data=(x_val_encoded, y_val_encoded),
                    epochs=epochs, # TODO
                    batch_size=batch_size,
                    callbacks=[early_stopping]
                    )
plot_development(history)

In [None]:
model.evaluate(x_train_encoded, y_train_encoded)
evaluate_model(model, "Train single-GRU", x_train_encoded, y_train_encoded, only_metrics=True)

model.evaluate(x_val_encoded, y_val_encoded, verbose=0)
evaluate_model(model, "Val single-GRU", x_val_encoded, y_val_encoded, only_metrics=True)

model.evaluate(x_test_encoded, y_test_encoded, verbose=0)
evaluate_model(model, "Test single-GRU", x_test_encoded, y_test_encoded, y_test, only_metrics=False)
senti_labels = ['negative', 'neutral', 'positive'] # TODO
#calculate_OvR_roc_auc_score(model, x_train, y_train, x_test, y_test, senti_labels)

# Multi-Input

In [None]:
num_gru_layersA = Integer(low=1, high=5, name='num_gru_layersA')
num_gru_layersB = Integer(low=1, high=5, name='num_gru_layersB')
num_gru_unitsA = Integer(low=32, high=256, name='num_gru_unitsA')
num_gru_unitsB = Integer(low=32, high=256, name='num_gru_unitsB')

search_space = [
            num_gru_layersA,
            num_gru_layersB,
            num_gru_unitsA,
            num_gru_unitsB,
            learning_rate,
            adam_decay
            ]

# Specify one or more initial points for the search of optimal parameter
default_params = [1, 
                  1, 
                  32,
                  32, 
                  1e-3,
                  1e-3 
                  ]

In [None]:
def define_multi_channel_gru_model(num_gru_layersA, num_gru_layersB, num_gru_unitsA, num_gru_unitsB, learning_rate, adam_decay):
    # Vocabulary-based embedding layer
    inputsA = Input(shape=(max_seq_length,), name="input regular embeddings")
    # Word2Vec embedding layer
    inputsB = Input(shape=(max_seq_length,), name="input word2vec embeddings")
    
    # Define an embedding layer for each input
    embeddingsA = Embedding(embedding_vocab_size, EMBEDDING_DIM, input_length=max_seq_length, name="embeddingsA")(inputsA)
    embeddingsB = Embedding(embedding_vocab_size, EMBEDDING_DIM, input_length=max_seq_length, weights=[w2v_embedding_vectors], trainable=False, name="embeddingsB")(inputsB)
    
    # Pass both embeddings through their own GRU layers
    gru_layersA = embeddingsA
    for i in range(num_gru_layersA):
        nameA = 'layer_gruA_{0}'.format(i+1)
        if i < num_gru_layers-1:
            gru_layersA = GRU(num_gru_unitsA, return_sequences=True, name=nameA)(gru_layersA)
        else:
            gru_layersA = GRU(num_gru_unitsA, return_sequences=False, name=nameA)(gru_layersA)
        
    gru_layersB = embeddingsB
    for i in range(num_gru_layersB):
        nameA = 'layer_gruB_{0}'.format(i+1)
        if i < num_gru_layers-1:
            gru_layersA = GRU(num_gru_unitsB, return_sequences=True, name=nameA)(gru_layersB)
        else:
            gru_layersA = GRU(num_gru_unitsB, return_sequences=False, name=nameA)(gru_layersB)
        

    # Concatenate the two inputs
    merged = concatenate([gru_layersA, gru_layersB])

    # Dense layer for the merged inputs & output Layer
    outputs = Dense(num_output_classes, activation='softmax', name="output")(merged)

    # Create the model
    model = Model(inputs=[inputsA, inputsB], outputs=outputs)

    # Compile the model
    adam = Adam(learning_rate=learning_rate, decay=adam_decay)
    model.compile(optimizer=adam, loss='categorical_crossentropy', metrics=['accuracy'])
    print(model.summary())

    return model

In [None]:
@use_named_args(dimensions=search_space)
def multi_objective_function(num_gru_layersA, num_gru_layersB, num_gru_unitsA, num_gru_unitsB, learning_rate, adam_decay, batch_size):

    model = define_multi_channel_gru_model(num_gru_layersA=num_gru_layersA,
                                            num_gru_layersB=num_gru_layersB,
                                            num_gru_unitsA=num_gru_unitsA,
                                            num_gru_unitsB=num_gru_unitsB,
                                            learning_rate=learning_rate,
                                            adam_decay=adam_decay
                                            )

    early_stopping = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)
    history = model.fit(x_train_encoded,
                        y_train_encoded,
                        validation_data=(x_val_encoded, y_val_encoded),
                        epochs=epochs, # TODO
                        batch_size=batch_size,
                        callbacks=[early_stopping]
                        )
    #return the validation accuracy for the last epoch.
    accuracy = history.history['val_accuracy'][-1]
    loss = history.history['val_loss'][-1]

    # Print the classification accuracy.
    print(f"Accuracy: {accuracy:.2%}")
    print(f"Loss: {loss:.2}\n")


    # Delete the Keras model with these hyper-parameters from memory.
    del model
    
    # Clear the Keras session, otherwise it will keep adding new
    # models to the same TensorFlow graph each time we create
    # a model with a different set of hyper-parameters.
    backend.clear_session()
    tf.compat.v1.reset_default_graph()
    
    # the optimizer aims for the lowest score, so we return our negative accuracy
    return -accuracy

## Gaussian Process Model

In [None]:
gp_result = gp_minimize(func=multi_objective_function,
                            dimensions=search_space,
                            n_calls=12,
                            noise= 0.01,
                            n_jobs=-1,
                            kappa = 5,
                            x0=default_params)

In [None]:
# TODO data frame summarizing parameter search
gp_best_params = {param.name: value for param, value in zip(gp_result.space, gp_result.x)}
print("Best Hyperparameters:", gp_best_params)

In [None]:
model = define_multi_channel_gru_model(gp_best_params['num_gru_layersA'],
                                        gp_best_params['num_gru_layersB'],
                                        gp_best_params['num_gru_unitsA'], 
                                        gp_best_params['num_gru_unitsB'],
                                        gp_best_params['learning_rate'], 
                                        gp_best_params['adam_decay']
                                        )

early_stopping = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True) # TODO: should I, again?
history = model.fit(x_train_encoded,
                        y_train_encoded,
                        validation_data=(x_val_encoded, y_val_encoded),
                        epochs=epochs, # TODO
                        batch_size=batch_size,
                        callbacks=[early_stopping]
                        )
plot_development(history)

In [None]:
model.evaluate(x_train_encoded, y_train_encoded)
evaluate_model(model, "Train multi-GRU", x_train_encoded, y_train_encoded, only_metrics=True)

model.evaluate(x_val_encoded, y_val_encoded, verbose=0)
evaluate_model(model, "Val multi-GRU", x_val_encoded, y_val_encoded, only_metrics=True)

model.evaluate(x_test_encoded, y_test_encoded, verbose=0)
evaluate_model(model, "Test multi-GRU", x_test_encoded, y_test_encoded, y_test, only_metrics=False)
senti_labels = ['negative', 'neutral', 'positive'] # TODO
#calculate_OvR_roc_auc_score(model, x_train, y_train, x_test, y_test, senti_labels)

## Gradient Boosted Model

In [None]:
gbrt_result = gbrt_minimize(func=objective_function,
                            dimensions=search_space,
                            n_calls=12,
                            n_jobs=-1,
                            x0=default_params)

In [None]:
# TODO data frame summarizing parameter search
gbrt_best_params = {param.name: value for param, value in zip(gbrt_result.space, gbrt_result.x)}
print("Best Hyperparameters:", gbrt_best_params)

In [None]:
model = define_multi_channel_gru_model(gbrt_best_params['num_gru_layersA'],
                                        gbrt_best_params['num_gru_layersB'],
                                        gbrt_best_params['num_gru_unitsA'], 
                                        gbrt_best_params['num_gru_unitsB'],
                                        gbrt_best_params['learning_rate'], 
                                        gbrt_best_params['adam_decay'],
                                        )

early_stopping = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True) # TODO: should I, again?
history = model.fit(x_train_encoded,
                        y_train_encoded,
                        validation_data=(x_val_encoded, y_val_encoded),
                        epochs=20, # TODO
                        batch_size=batch_size,
                        callbacks=[early_stopping]
                        )
plot_development(history)

In [None]:
model.evaluate(x_train_encoded, y_train_encoded)
evaluate_model(model, "Train multi-GRU", x_train_encoded, y_train_encoded, only_metrics=True)

model.evaluate(x_val_encoded, y_val_encoded, verbose=0)
evaluate_model(model, "Val multi-GRU", x_val_encoded, y_val_encoded, only_metrics=True)

model.evaluate(x_test_encoded, y_test_encoded, verbose=0)
evaluate_model(model, "Test multi-GRU", x_test_encoded, y_test_encoded, y_test, only_metrics=False)
senti_labels = ['negative', 'neutral', 'positive'] # TODO
#calculate_OvR_roc_auc_score(model, x_train, y_train, x_test, y_test, senti_labels)