# Inference

## Load Model

In [2]:
TO_LOAD_FULL_MODEL = False

# Import necessari
import os
import numpy as np
import pandas as pd
from keras import backend as K
from keras.models import load_model
from architectures.Seq2SeqGRU import Seq2SeqGRU
from architectures.Seq2SeqLSTM import Seq2SeqLSTM
from architectures.Seq2SeqLSTMGlove import Seq2SeqLSTMGlove
from architectures.Seq2SeqBiLSTM import Seq2SeqBiLSTM
from architectures.Seq2Seq3BiLSTM import Seq2Seq3BiLSTM
from architectures.Seq2SeqLSTMTransformer import Seq2SeqLSTMTransformer
from architectures.Seq2SeqBiLSTMImproved import Seq2SeqBiLSTMImproved
from utils import prepare_data, generate_model_name_additional_info
from tensorflow.keras.optimizers import Adam, RMSprop

K.clear_session()

optimizer_config = {"class": Adam, "learning_rate": 0.001}


# Choose model to inference
model_class = Seq2SeqBiLSTM
model_additional_info = {
    "additional_info": "",
    "hyperparameters": {
        "optimizer_class": optimizer_config["class"],
        "learning_rate": optimizer_config["learning_rate"],
        "embedding_dim": 512,
        "latent_dim": 256,
        "decoder_dropout": 0.2,
        "decoder_recurrent_dropout": 0.2,
        "encoder_dropout": 0.2,
        "encoder_recurrent_dropout": 0.2,
        "batch_size": 128,
        "epochs": 50,
    },
}
# Build the model name
model_info_name = model_class.__name__ + generate_model_name_additional_info(
    model_additional_info.get("additional_info"),
    model_additional_info.get("hyperparameters"),
)


# Carica i dati e i tokenizer
(
    x_voc,
    y_voc,
    x_tokenizer,
    y_tokenizer,
    x_training_padded,
    y_training_padded,
    x_validation_padded,
    y_validation_padded,
    max_text_len,
    max_summary_len,
) = prepare_data()

# Construct the model
optimizer = model_additional_info["hyperparameters"]["optimizer_class"](
    learning_rate=model_additional_info["hyperparameters"]["learning_rate"]
)

model_instance = model_class(
    x_voc=x_voc,
    y_voc=y_voc,
    max_text_len=max_text_len,
    max_summary_len=max_summary_len,
    x_tokenizer=x_tokenizer,
    y_tokenizer=y_tokenizer,
    name_additional_info=generate_model_name_additional_info(
        model_additional_info["additional_info"],
        model_additional_info["hyperparameters"],
    ),
    latent_dim=model_additional_info["hyperparameters"].get("latent_dim"),
    embedding_dim=model_additional_info["hyperparameters"].get("embedding_dim"),
    encoder_dropout=model_additional_info["hyperparameters"].get("encoder_dropout"),
    encoder_recurrent_dropout=model_additional_info["hyperparameters"].get(
        "encoder_recurrent_dropout"
    ),
    decoder_dropout=model_additional_info["hyperparameters"].get("decoder_dropout"),
    decoder_recurrent_dropout=model_additional_info["hyperparameters"].get(
        "decoder_recurrent_dropout"
    ),
)

model_instance.change_optimizer(optimizer)
model = model_instance.get_model()

# Build the model name
weights_model_file_name = model_info_name + ".weights.h5"
full_weights_model_file_name = model_info_name + "_full_model.h5"

model_file_name = ""
if TO_LOAD_FULL_MODEL:
    model_file_name = full_weights_model_file_name
else:
    model_file_name = weights_model_file_name

# Load the model
model_path = os.path.join("results", model_class.__name__, "weights", model_file_name)
model.load_weights(model_path)

print(f"Model loaded: {model_path}")



[nltk_data] Downloading package stopwords to /home/enrico/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


Model loaded: results/Seq2SeqBiLSTM/weights/Seq2SeqBiLSTM_optimizerAdam_lr0.001_ed512_ld256_do0.2_drdo0.2_edo0.2_erdo0.2_batch_size128_epochs50.weights.h5


## Inference Model

In [4]:
NUMBER_OF_INFERENCES = 50
VERBOSE = False

import contextlib
from IPython.utils import io


for i in range(NUMBER_OF_INFERENCES):
    # Inference
    with io.capture_output() if not VERBOSE else contextlib.nullcontext():
        review = model_instance.seq2text(x_validation_padded[i])
        original = model_instance.seq2summary(y_validation_padded[i]).replace("<end>", "").strip()
        predicted = model_instance.decode_sequence(x_validation_padded[i].reshape(1, max_text_len)).replace("<end>", "").strip()

    # Format output
    header = f" REVIEW "
    print(f"\n{header:-^100}")
    print(f"{'Review:':<20}", review)
    print(f"\n{'Original summary:':<20}", original)
    print(f"{'Predicted summary:':<20}", predicted)
    print("-" * 100)


---------------------------------------------- REVIEW ----------------------------------------------
Review:              perfect size refreshing drink think proof simple best would first choice drink

Original summary:    excellent choice
Predicted summary:   delicious
----------------------------------------------------------------------------------------------------

---------------------------------------------- REVIEW ----------------------------------------------
Review:              cats live outdoor come every evening give treats get super excited every time give whiskas temptations brands keep brands avoid way gave slice ham hope helps

Original summary:    we love temptations
Predicted summary:   my cats love this
----------------------------------------------------------------------------------------------------

---------------------------------------------- REVIEW ----------------------------------------------
Review:              dear raisin bran less raisin bran sincere