# Generating Stanzas

In [140]:
import pickle
import pandas as pd
import numpy as np
import keras
from keras.preprocessing.text import Tokenizer
from utils import tokenize_song, tokenize_song_by_stanza, convertSamplesToEmbeddings, read_embeddings


In [141]:
N_GRAM = 5
BATCH_SIZE = 1000
SENTENCE_BEGIN = '<s>'
SENTENCE_END = '</s>'
NEW_LINE = 'newlinebreak'
STANZAS_FILE = "../data/processed/stanzas.txt"

FEEDFORWARD_FILE = "../models/feed_forward_model.h5"

RNN_MODEL_2_UNITS_FILE = "../models/rnn_model_2_units.h5"
RNN_MODEL_4_UNITS_FILE = "../models/rnn_model_4_units.h5"
RNN_MODEL_8_UNITS_FILE = "../models/rnn_model_8_units.h5"
RNN_MODEL_16_UNITS_FILE = "../models/rnn_model_16_units.h5"
RNN_MODEL_32_UNITS_FILE = "../models/rnn_model_32_units.h5"
RNN_MODEL_64_UNITS_FILE = "../models/rnn_model_64_units.h5"

LSTM_FILE = "../models/lstm_model.h5"

ATTENTION_LSTM_FILE = "../models/attention_lstm_model.h5"

In [180]:
feedforward_model = keras.models.load_model(FEEDFORWARD_FILE)
rnn_model_2_units = keras.models.load_model(RNN_MODEL_2_UNITS_FILE)
rnn_model_4_units = keras.models.load_model(RNN_MODEL_4_UNITS_FILE)
rnn_model_8_units = keras.models.load_model(RNN_MODEL_8_UNITS_FILE)
rnn_model_16_units = keras.models.load_model(RNN_MODEL_16_UNITS_FILE)
rnn_model_32_units = keras.models.load_model(RNN_MODEL_32_UNITS_FILE)
rnn_model_64_units = keras.models.load_model(RNN_MODEL_64_UNITS_FILE)

lstm_model = keras.models.load_model(LSTM_FILE)
attention_lstm_model = keras.models.load_model(ATTENTION_LSTM_FILE)



### Make tokenizer

In [143]:
stanzas_as_words = []
with open(STANZAS_FILE, 'r', encoding='utf-8') as txtfile:
    for line in txtfile:
        # Split each line into a list using '\t' as the separator
        line_data = line.strip().split('\t')
        stanzas_as_words.append(line_data)

In [144]:
tokenizer = Tokenizer(char_level=False)
tokenizer.fit_on_texts(stanzas_as_words)
# Convert stanzas into numerical indexes (list of lists of string -> list of lists of int)
stanzas = tokenizer.texts_to_sequences(stanzas_as_words)

In [145]:
print(stanzas[0])

[530, 13, 13, 13, 541, 11, 24779, 35, 2112, 1, 2389, 24779, 35, 305, 1, 367, 1689, 200, 100, 3580, 16, 20, 2537, 1, 412, 902, 27401, 1737, 1, 77, 4, 610, 715, 4, 1685, 6, 4, 805, 1, 15, 418, 7, 445, 11, 626, 84, 5, 598, 10, 1, 1, 2, 22, 73, 1678, 29, 933, 1, 2, 67, 37, 30, 17, 933, 16, 5, 1, 2, 22, 73, 1678, 29, 933, 1, 2, 22, 113, 282, 29, 4, 401, 1, 17, 4, 302, 1086, 1, 6, 624, 5, 93, 1, 1, 4772, 26, 10583, 3, 196, 1, 2827, 26, 190, 496, 4, 2074, 1, 16, 46, 439, 2, 87, 106, 773, 181, 5, 1, 268, 52, 16, 20, 431, 1, 250, 2, 528, 77, 4, 761, 10080, 1, 7, 5, 47, 3974, 4, 833, 715, 20, 2587, 1, 1, 2, 22, 73, 1678, 29, 933, 1, 2, 67, 37, 30, 17, 933, 16, 5, 1, 2, 22, 73, 1678, 29, 933, 1, 2, 22, 113, 282, 29, 4, 401, 1, 17, 4, 302, 1086, 1, 6, 624, 5, 93, 1, 1, 2, 22, 73, 1678, 29, 933, 1, 2, 22, 73, 1678, 29, 933, 1, 2, 22, 113, 282, 29, 4, 401, 1, 17, 4, 302, 1086, 6, 1790, 611, 1, 14, 14, 14]


### Get Index to Embeddings

In [146]:
index_to_embeddings = read_embeddings("../reference-materials/lyrics_embeddings.txt", tokenizer=tokenizer)

### Prediction Function

In [213]:
def predict_word(model, tokenizer, index_to_embedding, last_words):
    """
    Predicts the next word in a sequence.
    """
    # YOUR CODE HERE
    predictions = model.predict(convertSamplesToEmbeddings([last_words], index_to_embedding))[0]
    prob_size = sum(predictions)
    predictions = [x/prob_size for x in predictions]
    chosen_index = np.random.choice(len(predictions), p=predictions, size=1)
    return chosen_index[0]

def predict_stanza(model, tokenizer, index_to_embedding, genre, ngram=N_GRAM):
    """
    Predicts the next stanza in a song.
    """
    stanza = []
    genre_embedding = tokenizer.word_index[genre]
    for i in range(ngram - 2):
        stanza.append(tokenizer.word_index[SENTENCE_BEGIN])
    while stanza[-1] != tokenizer.word_index[SENTENCE_END] and len(stanza) < 40:
        last_words = stanza[-ngram + 2:]
        last_words_with_genre = [genre_embedding]  + last_words
        stanza.append(predict_word(model, tokenizer, index_to_embedding, last_words_with_genre))
    stanza = [tokenizer.index_word[index] for index in stanza]
    return stanza

In [189]:
def print_stanza(stanza):
    """
    Prints a stanza.
    """
    stanza = [word for word in stanza if word not in [SENTENCE_BEGIN, SENTENCE_END]]
    lines = []
    line = []
    for word in stanza:
        if word == NEW_LINE:
            lines.append(line)
            print(' '.join(line))
            line = []
        else:
            line.append(word)
    if NEW_LINE not in stanza:
        print(' '.join(line))


## RNN Model Generation

In [161]:
rnn_predicted_stanza_pop = predict_stanza(rnn_model_16_units, tokenizer, index_to_embeddings, 'pop')
print('RNN 16 units Generated Stanza (pop):')
print_stanza(rnn_predicted_stanza_pop)

RNN 16 units Generated Stanza (pop):
i saw you smile , your 'd do more than your

i know you 've got a sun inside


In [198]:
rnn_predicted_stanza_rock = predict_stanza(rnn_model_16_units, tokenizer, index_to_embeddings, 'pop')
print('RNN 16 units Generated Stanza (pop):')
print_stanza(rnn_predicted_stanza_rock)

RNN 16 units Generated Stanza (pop):
ports-of-call long ,
as mary
you you were saying

she paints sail )
the heaven is selling


In [205]:
rnn_predicted_stanza_rock = predict_stanza(rnn_model_16_units, tokenizer, index_to_embeddings, 'hip_hop')
print('RNN 16 units Generated Stanza (hip_hop):')
print_stanza(rnn_predicted_stanza_rock)

RNN 16 units Generated Stanza (hip_hop):
move , that will people
i saw you true , you ... off good my gray gray those as , , off
bella , will sway


In [210]:
rnn_predicted_stanza_pop = predict_stanza(rnn_model_2_units, tokenizer, index_to_embeddings, 'pop')
print('RNN 2 units Generated Stanza (pop):')
print_stanza(rnn_predicted_stanza_pop)

RNN 2 units Generated Stanza (pop):







































In [212]:
rnn_predicted_stanza_rock = predict_stanza(rnn_model_2_units, tokenizer, index_to_embeddings, 'rock')
print('RNN 2 units Generated Stanza (rock):')
print(rnn_predicted_stanza_rock)

RNN 2 units Generated Stanza (rock):
['<s>', '<s>', '<s>', 'newlinebreak', 'newlinebreak', 'newlinebreak', 'newlinebreak', 'newlinebreak', 'newlinebreak', 'newlinebreak', 'newlinebreak', 'newlinebreak', 'newlinebreak', 'newlinebreak', 'newlinebreak', 'newlinebreak', 'newlinebreak', 'newlinebreak', 'newlinebreak', 'newlinebreak', 'newlinebreak', 'newlinebreak', 'newlinebreak', 'newlinebreak', 'newlinebreak', 'newlinebreak', 'newlinebreak', 'newlinebreak', 'newlinebreak', 'newlinebreak', 'newlinebreak', 'newlinebreak', 'newlinebreak', 'newlinebreak', 'newlinebreak', 'newlinebreak', 'newlinebreak', 'newlinebreak', 'newlinebreak', 'newlinebreak']


## LSTM Model Generation

In [171]:
lstm_predicted_stanza_pop = predict_stanza(lstm_model, tokenizer, index_to_embeddings, 'pop')
print('LSTM 2 units Generated Stanza (pop):')
print_stanza(lstm_predicted_stanza_pop)

LSTM 2 units Generated Stanza (pop):
love burn a ] today
the push the time

they hook up saw gray now need you 've
that
earphone
the , silent crazy
are
you


In [174]:
lstm_predicted_stanza_rock = predict_stanza(lstm_model, tokenizer, index_to_embeddings, 'rock')
print('LSTM 2 units Generated Stanza (rock):')
print_stanza(lstm_predicted_stanza_rock)

LSTM 2 units Generated Stanza (rock):
you know operate , you start your blade doing ...
in know my brooks long up street afonud his trailor-park love a were find you inside are be you 5ths change you


## LSTM Model with Attention Generation

In [194]:
lstm_attention_predicted_stanza_pop = predict_stanza(attention_lstm_model, tokenizer, index_to_embeddings, 'pop')
print('LSTM Attention 2 units Generated Stanza (pop):')
print_stanza(lstm_attention_predicted_stanza_pop)

LSTM Attention 2 units Generated Stanza (pop):
sirens bind ringing naive jump n't 're never 'll expect came proceeeeeeed chopper puttered mine


In [197]:
lstm_attention_predicted_stanza_rock = predict_stanza(attention_lstm_model, tokenizer, index_to_embeddings, 'country')
print('LSTM Attention 2 units Generated Stanza (rock):')
print_stanza(lstm_attention_predicted_stanza_rock)

LSTM Attention 2 units Generated Stanza (rock):
'll expect 's muthafukin ah-haa yeah.newlinebreak agonewlinebreak eagles did singing this diddy-wah-diddy did gun sirens drive 'll expect skybox puberty 'new rothschild eccederin selflessly biased patient pokin where where 'll expect 'll expect broonzy drown aisy leonard
