## Using a LSTM to try to generate new monkeys' songs.
First, we'll do a bunch of importing

In [2]:
from __future__ import print_function
from keras.callbacks import LambdaCallback
from keras.models import Sequential
from keras.layers import Dense, Dropout
from keras.layers import LSTM
from keras.optimizers import RMSprop
from keras.utils.data_utils import get_file
import numpy as np
import pandas as pd
import random
import sys
import io
import string, re
import nltk
from nltk.corpus import stopwords
from nltk.tokenize import RegexpTokenizer
import itertools

Cleaning the text...

In [3]:
def remove_punc(lyrics):
    return "".join([c for c in lyrics if c not in string.punctuation])
def remove_stopwords(lyrics):
    return [w for w in lyrics if w not in stopwords.words('english')]

markers = ['[', ']','Verse','1','2','3','Chorus','Spoken Intro','Intro','Bridge','PreChorus', 'and','And','Outro']

def remove_markers(lyrics):
    return [w for w in lyrics if w not in markers]

lyrics_df = pd.read_csv('data/lyrics.csv')
tokenizer = RegexpTokenizer(r'\w+')

lyrics_df['lyrics'] = lyrics_df['lyrics'].apply(lambda x: remove_punc(x))
lyrics_df['lyrics'] = lyrics_df['lyrics'].apply(lambda x: tokenizer.tokenize(x))
lyrics_df['lyrics'] = lyrics_df['lyrics'].apply(lambda x: remove_markers(x))

Formating the text to the lstm...

In [8]:
def format_lyrics(lyrics, maxlen, step):
    text = list(itertools.chain.from_iterable(lyrics))
    text = ' '.join(text)
    chars = sorted(list(set(text)))
    print('total chars:', len(chars))
    char_indices = dict((c, i) for i, c in enumerate(chars))
    indices_char = dict((i, c) for i, c in enumerate(chars))

    # cut the text in semi-redundant sequences of maxlen characters
    
    sentences = []
    next_chars = []
    for i in range(0, len(text) - maxlen, step):
        sentences.append(text[i: i + maxlen])
        next_chars.append(text[i + maxlen])
    print('nb sequences:', len(sentences))

    print('Vectorization...')
    x = np.zeros((len(sentences), maxlen, len(chars)), dtype=np.bool)
    y = np.zeros((len(sentences), len(chars)), dtype=np.bool)
    for i, sentence in enumerate(sentences):
        for t, char in enumerate(sentence):
            x[i, t, char_indices[char]] = 1
        y[i, char_indices[next_chars[i]]] = 1
        
    return chars, char_indices, indices_char, x, y, text


We'll only use the lyrics from tbh&c

In [17]:
maxlen = 50
step = 3
tbhc_lyrics = lyrics_df[lyrics_df['album'] == 'Tranquility Base Hotel & Casino']
tbhc_lyrics = tbhc_lyrics['lyrics'].values

chars, char_indices, indices_char, x, y, text = format_lyrics(tbhc_lyrics, maxlen, step)

total chars: 57
nb sequences: 4297
Vectorization...


Now, defining the model and using some functions to visualize the process, based on [this tutorial](https://keras.io/examples/lstm_text_generation/)

In [18]:
print('Build model...')
model = Sequential()
model.add(LSTM(256, input_shape=(maxlen, len(chars)), return_sequences=True))
model.add(Dropout(0.2))
model.add(LSTM(256, return_sequences=True))
model.add(Dropout(0.2))
model.add(LSTM(128))
model.add(Dense(len(chars), activation='softmax'))

optimizer = RMSprop(learning_rate=0.01)
model.compile(loss='categorical_crossentropy', optimizer=optimizer)

def sample(preds, temperature=1.0):
    # helper function to sample an index from a probability array
    preds = np.asarray(preds).astype('float64')
    preds = np.log(preds) / temperature
    exp_preds = np.exp(preds)
    preds = exp_preds / np.sum(exp_preds)
    probas = np.random.multinomial(1, preds, 1)
    return np.argmax(probas)


def on_epoch_end(epoch, _):
    # Function invoked at end of each epoch. Prints generated text.
    print()
    if(((epoch+1)%15) != 0 ):
        return
    print('----- Generating text after Epoch: %d' % epoch)

    start_index = random.randint(0, len(text) - maxlen - 1)
    for diversity in [0.2, 0.5, 1.0, 1.2]:
        print('----- diversity:', diversity)

        generated = ''
        sentence = text[start_index: start_index + maxlen]
        generated += sentence
        print('----- Generating with seed: "' + sentence + '"')
        sys.stdout.write(generated)

        for i in range(400):
            x_pred = np.zeros((1, maxlen, len(chars)))
            for t, char in enumerate(sentence):
                x_pred[0, t, char_indices[char]] = 1.

            preds = model.predict(x_pred, verbose=0)[0]
            next_index = sample(preds, diversity)
            next_char = indices_char[next_index]

            sentence = sentence[1:] + next_char

            sys.stdout.write(next_char)
            sys.stdout.flush()
        print()

print_callback = LambdaCallback(on_epoch_end=on_epoch_end)

Build model...


In [19]:
model.fit(x, y,
          batch_size=128,
          epochs=60,
          callbacks=[print_callback])

Epoch 1/60

Epoch 2/60

Epoch 3/60

Epoch 4/60

Epoch 5/60

Epoch 6/60

Epoch 7/60

Epoch 8/60

Epoch 9/60

Epoch 10/60

Epoch 11/60

Epoch 12/60

Epoch 13/60

Epoch 14/60

Epoch 15/60

----- Generating text after Epoch: 14
----- diversity: 0.2
----- Generating with seed: " a singer must die Singsong Round the Money Tree T"
 a singer must die Singsong Round the Money Tree The louk the louk the ste ste pore the ste pe the woth the been the male the woth a lou the stor the the be in the woth the wath the pont on the wole fo the that a lou the lole fon a lou ghat a lou that the ste whon the ball the bole fou what a lou the wole fon the louk the bot the woth the the ste pand the the pont the the loth the u lou the pond the pon the wose that the goun the loth that a liut
----- diversity: 0.5
----- Generating with seed: " a singer must die Singsong Round the Money Tree T"
 a singer must die Singsong Round the Money Tree Thit the puth ant thit a lout on ant fot awt tho nath ons a pout the lot


Epoch 44/60

Epoch 45/60

----- Generating text after Epoch: 44
----- diversity: 0.2
----- Generating with seed: "n bars Waffling on to strangers all about martial "
n bars Waffling on to strangers all about martial a wan looking on port fleaned Ill be me in the beterner Pand speaking Ple It so nall the beterning Sle mened love You kust the beon Bler to your call Inst to mity you uvler She looks like fun She looks like fun She looking on poll to the best Bant so you with u wat a will four anterner to mitio I dove me in the beod Pance Singor Owarder Ole were Time to mititill I jupt anmot all got all got all on
----- diversity: 0.5
----- Generating with seed: "n bars Waffling on to strangers all about martial "
n bars Waffling on to strangers all about martial a night four stars tike Lete Bint time the bent exout youv your call Bint in me ece a got a list the meon The bent so y ur fiction does Yoe my make A cante to mity you on the memery for a little with youd too trr that tre beon The

<keras.callbacks.callbacks.History at 0x7fd68c2de780>