<span style="font-size: 2em; font-weight:bold">AI 70's Country</span>

In [1]:
import warnings
warnings.filterwarnings('ignore')

import scipy
import numpy as np
import matplotlib
import pandas as pd
import statsmodels
import sklearn
import tensorflow
#import keras

from tensorflow.python.keras.models import Sequential, load_model
from tensorflow.python.keras.layers import Dense,LSTM,Dropout
from tensorflow.python.keras.utils import to_categorical
from tensorflow.python.keras.preprocessing.sequence import pad_sequences
from tensorflow.python.keras.callbacks import History, EarlyStopping, ModelCheckpoint
from tensorflow.python.keras.constraints import maxnorm
import string


import json
import time

As always, data prep is the hardest part of the project.  Because I am going to use a validation set in training my model, and because Keras uses the last n% of the data as the validation set, I want to shuffle the lyrics so that my validation set contains a better representation of all the data - not just the last song.  I also want to get the most originality that I can out of the model, so I will eliminate duplicate lyrics (some songs have refrains that repeat multiple times).   sorting has benefit of shuffling also. 

In [2]:
lyricsCSV = pd.read_csv('lyricsTrain_35.csv',encoding='ISO-8859-1')
lyricsCSV.sort_values(lyricsCSV.columns[0],inplace=True)
lyricsCSV.drop_duplicates(keep='first',inplace=True)
lyricsCSV = lyricsCSV.sample(frac=1)



lyricsCSV.to_csv('lyrics.txt',sep='\t',index=False)
l = open('lyrics.txt','r')
lyrics = l.read()
l.close()

Remove lines and print

In [3]:
tokens = lyrics.split()
lyrics = ' '.join(tokens)
print(lyrics)

lyrics And then one winter day His mama named him Tommy, but folks just called him yellow Then a man of low degree stood by her side So you better think it over With cigarettes and songs Except I can't sleep Even with someone they love My only prayer will be that some day you'll care for me but it's only make believe Nice to see you But I could never love again And combed my hair Sunday morning coming down Just may, just give me a call-you know where I am """When you hot, you hot""" Oh there goes my everything With a laughing little girl And it's good when I finally make it home, all alone Bad so I had one more for dessert And found my cleanest dirty shirt I didn't mean to treat you bad Give me no reasons, give me alibies But when he lo-oves me, he really lo-oves me Then the flame became a dying ember But if you ever want somebody to just love ya, and some day you Well, I guess that's about all I gotta say. The rain can fall so soft against the window I know those bright lights are cal

One final thing, when I look at the above lyrics, I seem to see a LOT of quotation marks.  So, I am going to just replace those with a space.  

In [4]:
lyrics = lyrics.replace('"',' ')

Now, we can build sequences of characters that will be used to predict a final character

In [5]:
length = 100 # Length of the characer sequences (because we have so much verbage,
             # we can use a relatively large number)
sequences = list()
for i in range(length, len(lyrics)):
    seq = lyrics[i-length:i+1]
    sequences.append(seq)

Create and save a .txt file of our sequences with line endings

In [6]:
data = '\n'.join(sequences)
file = open('char_sequences.txt','w')
file.write(data)
file.close()

Create a dictionary of character:number mappings 

In [7]:
file = open('char_sequences.txt','r')
raw_text = file.read()
file.close()

lines = raw_text.split('\n')

chars = sorted(list(set(raw_text)))
mapping = dict((c, i) for i, c in enumerate(chars))

# Save the mapping as json 
json_map = json.dumps(mapping)
__ = open('mapping.json','w')
__.write(json_map)
__.close()

Use the dictionary to create sequences of numbers only (numbers that describe the characters)

In [8]:
sequences = list()
for line in lines:
    encoded_seq = [mapping[char] for char in line]
    sequences.append(encoded_seq)
    


Create input sets (with 99 characters) and output sets (1 character) and then one-hot code the sets so we can use them to train the model.

In [9]:
vocab_size = len(mapping)
sequences = np.array(sequences)
X, y = sequences[:,:-1], sequences[:,-1]
sequences = [to_categorical(x, num_classes=vocab_size) for x in X] #one-hot code input
X = np.array(sequences)
y = to_categorical(y, num_classes=vocab_size) #one-hot code output

Fit the model with tuning parameters determined by trial and error.

In [10]:
units = 50  # From the mentioned article in data science
epochs = 1000  # Just a large number since I am using early stopping
validationSplit = 0.2 # My data set is small so I want to use as much as possible to train vs. validate
shuffle = True
batchSize = 64 # Doubled the default batch size to speed up training
dropOut = .2  # http://papers.nips.cc/paper/4878-understanding-dropout.pdf
    #Define callbacks

es = EarlyStopping(monitor = 'val_loss',min_delta = .01, patience = 5, mode = 'min',verbose=1)
mc = ModelCheckpoint('model.3LSTM.best', monitor='acc', mode='max', save_best_only=True) # Keep best model


    # define and fit model
model = Sequential()
model.add(LSTM(units, return_sequences=True,dropout = dropOut,input_shape=(X.shape[1], X.shape[2])))
model.add(LSTM(units, return_sequences=True,dropout = dropOut,input_shape=(X.shape[1], X.shape[2])))
model.add(LSTM(units, dropout = dropOut,input_shape=(X.shape[1], X.shape[2])))
model.add(Dropout(dropOut))
model.add(Dense(vocab_size, activation='softmax',kernel_constraint=maxnorm(3)))
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])


modelLyrics = model.fit(X, y, epochs = epochs, validation_split = validationSplit, 
                            shuffle = shuffle, batch_size = batchSize, verbose=1,callbacks=[es, mc])

   
history = pd.DataFrame(modelLyrics.history)
history.to_csv('modelLyricsHistory.3LSTM.csv',index=False)

Instructions for updating:
Colocations handled automatically by placer.
Instructions for updating:
Please use `rate` instead of `keep_prob`. Rate should be set to `rate = 1 - keep_prob`.
Train on 23347 samples, validate on 5837 samples
Instructions for updating:
Use tf.cast instead.
Epoch 1/1000
Epoch 2/1000
Epoch 3/1000
Epoch 4/1000
Epoch 5/1000
Epoch 6/1000
Epoch 7/1000
Epoch 8/1000
Epoch 9/1000
Epoch 10/1000
Epoch 11/1000
Epoch 12/1000
Epoch 13/1000
Epoch 14/1000
Epoch 15/1000
Epoch 16/1000
Epoch 17/1000
Epoch 18/1000
Epoch 19/1000
Epoch 20/1000
Epoch 21/1000
Epoch 22/1000
Epoch 23/1000
Epoch 24/1000
Epoch 25/1000
Epoch 26/1000
Epoch 27/1000
Epoch 28/1000
Epoch 29/1000
Epoch 30/1000
Epoch 31/1000
Epoch 32/1000
Epoch 33/1000
Epoch 34/1000
Epoch 35/1000
Epoch 36/1000
Epoch 37/1000
Epoch 38/1000
Epoch 39/1000
Epoch 40/1000
Epoch 41/1000
Epoch 42/1000
Epoch 43/1000
Epoch 44/1000
Epoch 45/1000
Epoch 46/1000
Epoch 47/1000
Epoch 48/1000
Epoch 49/1000
Epoch 50/1000


Epoch 51/1000
Epoch 52/1000
Epoch 53/1000
Epoch 54/1000
Epoch 55/1000
Epoch 56/1000
Epoch 57/1000
Epoch 58/1000
Epoch 59/1000
Epoch 60/1000
Epoch 61/1000
Epoch 62/1000
Epoch 63/1000
Epoch 64/1000
Epoch 65/1000
Epoch 66/1000
Epoch 00066: early stopping


Create a function that encodes a kickoff text string and then plugs it into our trained model

# generate a sequence of characters with a language model
def generate_seq(model, mapping, seq_length, seed_lyric, n_chars):
    lyrics = seed_lyric
    for __ in range(n_chars):
    # encode the characters as integers
        encoded = [mapping[char] for char in lyrics]
    # truncate sequences to a fixed length
        encoded = pad_sequences([encoded], maxlen=seq_length, truncating='pre')
    # one hot encode
        encoded = to_categorical(encoded, num_classes=len(mapping))
    # predict character
        yhat = model.predict_classes(encoded, verbose=0)
    # reverse map integer to character
        out_char = ''
        for char, index in mapping.items():
            if index == yhat:
                out_char = char
                break
    # append to input
        lyrics += char
    return lyrics

Is there a song other than Stairway to Heaven that I could have used for the kickoff sequence?

startLyrics = "I've had a largemouth bass bust my line A couple beautiful girls tell me, Goodbye Trucks break down,"

startLyrics = "There's a lady who's sure All that glitters is gold And she's buying a stairway to heaven When she g"

__ = open('lyrics.Coolio.Epoch100.txt','w')

model = load_model('model.100Epoch_6.6.best')
lyricsFinal = generate_seq(model,mapping,length,startLyrics,1000)
__ = open('lyrics.Coolio.Epoch100.txt','a+')
#__.write('Drop {}\n\n'.format(modelNum))
__.write(lyricsFinal)
__.write('.\n\n\n')
__.close()

Run the model and print the lyrics

def run_models(modelNum):
    model = load_model('model.drop_0.{}.best'.format(modelNum))
    lyricsFinal = generate_seq(model,mapping,length,startLyrics,1000)
    __ = open('lyrics.LedZep.txt','a+')
    __.write('Drop {}\n\n'.format(modelNum))
    __.write(lyricsFinal)
    __.write('.\n\n\n')
    __.close()
    
    

__ = open('lyrics.LedZep.txt','w')
__.write('Stairway to Heaven/n/n')
__.close()

for i in range(0,6):
    run_models(str(i))

df = pd.read_json('modelLyricsHistory.json')

df