# Evaluating our Models

In [7]:
import pickle
import pandas as pd
import numpy as np
import keras
from keras.preprocessing.text import Tokenizer
from utils import tokenize_song, tokenize_song_by_stanza, convertSamplesToEmbeddings, read_embeddings, generate_ngram_training_samples, data_generator

In [8]:
N_GRAM = 5
BATCH_SIZE = 1000
SENTENCE_BEGIN = '<s>'
SENTENCE_END = '</s>'
NEW_LINE = 'newlinebreak'
STANZAS_FILE = "../data/processed/stanzas.txt"
FEEDFORWARD_FILE = "../models/feedforward_model.h5"
RNN_FILE = "../models/rnn_model.h5"
LSTM_FILE = "../models/lstm_model.h5"
ATTENTION_LSTM_FILE = "../models/attention_lstm_model.h5"

## Make Tokenizer and get Embeddings

In [9]:
stanzas_as_words = []
with open(STANZAS_FILE, 'r', encoding='utf-8') as txtfile:
    for line in txtfile:
        # Split each line into a list using '\t' as the separator
        line_data = line.strip().split('\t')
        stanzas_as_words.append(line_data)

tokenizer = Tokenizer(char_level=False)
tokenizer.fit_on_texts(stanzas_as_words)
# Convert stanzas into numerical indexes (list of lists of string -> list of lists of int)
stanzas = tokenizer.texts_to_sequences(stanzas_as_words)

In [10]:
index_to_embeddings = read_embeddings("../reference-materials/lyrics_embeddings.txt", tokenizer=tokenizer)

### Get Ngrams and Split in train and test

In [13]:
X, y = generate_ngram_training_samples(stanzas, N_GRAM)
split = int(0.7 * len(X))
X_train = X[:split]
y_train = y[:split]
X_test = X[split:]
y_test = y[split:]

# Load in models

In [14]:
# feedforward_model = keras.models.load_model(FEEDFORWARD_FILE)
rnn_model = keras.models.load_model(RNN_FILE)
lstm_model = keras.models.load_model(LSTM_FILE)
attention_lstm_model = keras.models.load_model(ATTENTION_LSTM_FILE)

## Evaluation Functions

In [19]:
def entropy(model, X_test, y_test, index_to_embeddings):
  data_generator_test = data_generator(X_test, y_test, index_to_embeddings, batch_size=BATCH_SIZE, sequence_length=N_GRAM, epochs=1)
  entropy = model.evaluate(data_generator_test, steps=len(X_test) // BATCH_SIZE)
  return entropy


In [20]:
rnn_model_entropy = entropy(rnn_model, X_test, y_test, index_to_embeddings)



In [23]:
rnn_perplexity = np.power(2, rnn_model_entropy[0])
print("RNN Perplexity: ", rnn_perplexity)

RNN Perplexity:  1216.099002302395
