In [0]:
import numpy as np
import pandas as pd
import sys 
from keras.models import Sequential
from keras.layers import LSTM, Activation, Flatten, Dropout, Dense, Embedding, TimeDistributed, CuDNNLSTM
from keras.callbacks import ModelCheckpoint
from keras.utils import np_utils
from keras.callbacks import LambdaCallback
from keras.optimizers import RMSprop, Adam
import random
import sys
import io

In [70]:
dataset = pd.read_csv('taylor_swift_lyrics.csv', encoding = "latin1")
dataset.head()

Unnamed: 0,artist,album,track_title,track_n,lyric,line,year
0,Taylor Swift,Taylor Swift,Tim McGraw,1,He said the way my blue eyes shined,1,2006
1,Taylor Swift,Taylor Swift,Tim McGraw,1,Put those Georgia stars to shame that night,2,2006
2,Taylor Swift,Taylor Swift,Tim McGraw,1,"I said, ""That's a lie""",3,2006
3,Taylor Swift,Taylor Swift,Tim McGraw,1,Just a boy in a Chevy truck,4,2006
4,Taylor Swift,Taylor Swift,Tim McGraw,1,That had a tendency of gettin' stuck,5,2006


In [0]:
def processFirstLine(lyrics, songID, songName, row):
    lyrics.append(row['lyric'] + '\n')
    songID.append( row['year']*100+ row['track_n'])
    songName.append(row['track_title'])
    return lyrics,songID,songName

lyrics = []
songID = []
songName = []
songNumber = 1
i = 0
isFirstLine = True

for index,row in dataset.iterrows():
    if(songNumber == row['track_n']):
        if (isFirstLine):
            lyrics,songID,songName = processFirstLine(lyrics,songID,songName,row)
            isFirstLine = False
        else :  
            lyrics[i] +=  row['lyric'] + '\n'
    else :
        lyrics,songID,songName = processFirstLine(lyrics,songID,songName,row)
        songNumber = row['track_n']
        i+=1

In [72]:
lyrics_data = pd.DataFrame({'songID':songID, 'songName':songName, 'lyrics':lyrics })
lyrics_data.head()

Unnamed: 0,songID,songName,lyrics
0,200601,Tim McGraw,He said the way my blue eyes shined\nPut those...
1,200602,Picture To Burn,"State the obvious, I didn't get my perfect fan..."
2,200603,Teardrops On My Guitar,Drew looks at me\nI fake a smile so he won't s...
3,200604,A Place In This World,"I don't know what I want, so don't ask me\nCau..."
4,200605,Cold as You,You have a way of coming easily to me\nAnd whe...


In [0]:
lyricsText =''
for listitem in lyrics:
    lyricsText += listitem
    
raw_text = lyricsText # string of all lyrics
raw_text = raw_text.lower()

In [0]:
chars = sorted(list(set(raw_text)))  # unique characters
int_chars = dict((i, c) for i, c in enumerate(chars))
chars_int = dict((i, c) for c, i in enumerate(chars))

In [75]:
n_chars = len(raw_text) # length of total lyrics
n_vocab = len(chars) # no of unique characters
print(n_chars) 
print(n_vocab) 

173604
58


In [76]:
maxlen = 40
step = 3
sentences = []
next_chars = []

for i in range(0, len(raw_text) - maxlen, step):
    sentences.append(raw_text[i: i + maxlen])
    next_chars.append(raw_text[i + maxlen])
    
print('nb sequences:', len(sentences))

nb sequences: 57855


In [0]:
x = np.zeros((len(sentences), maxlen, len(chars)), dtype=np.bool)
y = np.zeros((len(sentences), len(chars)), dtype=np.bool)

for i, sentence in enumerate(sentences):
    for t, char in enumerate(sentence):
        x[i, t, chars_int[char]] = 1
    y[i, chars_int[next_chars[i]]] = 1

In [0]:
model = Sequential()
model.add(LSTM(128, input_shape=(maxlen, len(chars))))
model.add(Dense(len(chars)))
model.add(Activation('softmax'))

model.compile(loss='categorical_crossentropy', optimizer=RMSprop(lr=0.01))

In [0]:
def sample(preds, temperature=1.0):
    preds = np.asarray(preds).astype('float64')
    preds = np.log(preds) / temperature
    exp_preds = np.exp(preds)
    preds = exp_preds / np.sum(exp_preds)
    probas = np.random.multinomial(1, preds, 1)
    return np.argmax(probas)

In [0]:
def on_epoch_end(epoch, logs):
    print('----- Generating text after Epoch: %d' % epoch)

    start_index = random.randint(0, len(raw_text) - maxlen - 1)
    for diversity in [0.2, 0.5]:
        print('----- diversity:', diversity)

        generated = ''
        sentence = raw_text[start_index: start_index + maxlen]
        generated += sentence
        print('----- Generating with seed: "' + sentence + '"')
        sys.stdout.write(generated)

        for i in range(400):
            x_pred = np.zeros((1, maxlen, len(chars)))
            for t, char in enumerate(sentence):
                x_pred[0, t, chars_int[char]] = 1.

            preds = model.predict(x_pred, verbose=0)[0]
            next_index = sample(preds, diversity)
            next_char = int_chars[next_index]

            generated += next_char
            sentence = sentence[1:] + next_char

            sys.stdout.write(next_char)
            sys.stdout.flush()
        print()

In [81]:
print_callback = LambdaCallback(on_epoch_end=on_epoch_end)

history = model.fit(
    x, 
    y,
    batch_size=128,
    epochs=10,
    callbacks=[print_callback]
)

Epoch 1/10
----- Generating text after Epoch: 0
----- diversity: 0.2
----- Generating with seed: "e all the roses
and the note that said
o"
e all the roses
and the note that said
on the been you wante the been i don't you can't be the been i made a don't for the been i me don't be the been you wand the been you some i me were be the been you sall you hand the been you hand the but i me love be the been i me look i shine starling you sand i know i don't know you some it some i made i sore the been i me don't bee some i me pore i don't be the been you some the wante be on the
----- diversity: 0.5
----- Generating with seed: "e all the roses
and the note that said
o"
e all the roses
and the note that said
on the were light ever the the were been i wand care
i mell be lot me gor starst of the ever the reel it in the were i never be to cou don't live i long be thin will i mond of the back i don't know you are and the wast you win thing the been you wand for me plake the never beever you tha

  This is separate from the ipykernel package so we can avoid doing imports until


ne back to me there all ag a fart
and like a perfect say in the right worth the look the mought here and right for you well
the door and round ournds thing down the day here
and you said a love me do

Epoch 9/10
----- Generating text after Epoch: 8
----- diversity: 0.2
----- Generating with seed: " yet? are we out of the woods yet?
are w"
 yet? are we out of the woods yet?
are we out of the woods yet? are we in the clear yet?
do break what you made me do
look what you made me do
look what you made me do
look what you think that it was the way dream
when you're time me out of the rain
why would back to do i don't wanna make you in the morny of the right was the mony
but i know what you say a have of the car
i don't wanna make you can feel the rain
i'm true
and the way dre
----- diversity: 0.5
----- Generating with seed: " yet? are we out of the woods yet?
are w"
 yet? are we out of the woods yet?
are we out of the woods yet? are we were we know that walk we were to be the runns will peo

In [87]:
# 2nd model

model = Sequential()
model.add(LSTM(128,input_shape=(maxlen, len(chars)), return_sequences=True))
model.add(LSTM(128))
model.add(Dense(100, activation='relu'))
model.add(Dense(len(chars), activation='softmax'))

model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
          
history = model.fit(
    x, 
    y,
    batch_size=128,
    epochs=10,
    callbacks=[print_callback]
)

Epoch 1/10
----- Generating text after Epoch: 0
----- diversity: 0.2
----- Generating with seed: " trains that just aren't coming
we show "
 trains that just aren't coming
we show tou sou the wou tou tou the you wou in in won in tou tou mo s no the tou toe ins that no tou wou sot the tou wou the in the tou sou toe in se tou you the tou so she tou tou wou tou wou in the cou you sou wou wou the tou the the in tou you tou the you wou soe tou woe tou in you the wou sou tou wou wou she in tou in the tou tou wou tou tou tou tou toe ann tou sou the tou sou sou tou tou thann soe wo
----- diversity: 0.5
----- Generating with seed: " trains that just aren't coming
we show "
 trains that just aren't coming
we show you you tate ann s we tas hat sor tou in it and inw woun the in ghe to n wo toed the an tou she se wint you thou at ia wou bo t st n wou sad wou fane lad an go wo i dhe id tou s yee you bite ind lant il te s wat 
an le at an s soe bhe that she you de woe tels ig int yhend init tann an y

In [0]:
LSTM_layer_num = 4 
layer_size = [256,256,256,256]
model = Sequential()
model.add(CuDNNLSTM(layer_size[0], input_shape =(maxlen, len(chars)), return_sequences = True))
for i in range(1,LSTM_layer_num) :
    model.add(CuDNNLSTM(layer_size[i],return_sequences=True))
    
model.add(Flatten())
model.add(Dense(len(chars)))
model.add(Activation('softmax'))
model.compile(loss = 'categorical_crossentropy', optimizer = 'adam')

In [94]:
model.fit(x,y, epochs = 10, batch_size = 128, verbose = 1, validation_split = 0.2,callbacks=[print_callback] )

Train on 46284 samples, validate on 11571 samples
Epoch 1/10
----- Generating text after Epoch: 0
----- diversity: 0.2
----- Generating with seed: "ge that you think i'm funny 'cause he ne"
ge that you think i'm funny 'cause he nee tee soe toa toe i oe toe eo eee you i ee tee eo ee ee he ee hee i the eee teee toe tore toe hoe to ee tea see see i me wee i i tee tia i eet i ah i ee tee i see eet toe soe tos tee eae toe toe tee see tee ee i toe toae a eta in i to ee het i ee ee see i ee tee i see tee tae see i dee tee i me teen you see thane the eee shee
in i ee tei tee tee tae i oe hoe toae you wee toe eoe toe too tee tee ho
----- diversity: 0.5
----- Generating with seed: "ge that you think i'm funny 'cause he ne"
ge that you think i'm funny 'cause he nerse o oit a dind you woe hote oo roe toon you eete hee i wae lowe toe wees soee soe woee
she bese teo tea thete i mot i lme ee eo ke cea
tese it oh ee toe teae lete ahd tee sote hotd be tea
a i at ten if i dea tale see i me shae cie cite

<keras.callbacks.History at 0x7f60e06f9160>