In [1]:
import importlib
import numpy as np
import pandas as pd
import utils

In [2]:
importlib.reload(utils)

<module 'utils' from '/home/fei/Documents/projects/lyrics/stacked_lstm_statefull/utils.py'>

In [3]:
from keras.models import Sequential, load_model
from keras.layers import Embedding, LSTM, Dense, Dropout, Flatten, Conv1D, MaxPooling1D, BatchNormalization, Bidirectional
from keras.preprocessing.sequence import pad_sequences
# from keras.optimizers import Adam

Using TensorFlow backend.


In [4]:
word2ind, ind2word = utils.load_index_word_map()

In [5]:
vocab_size = len(word2ind)

In [6]:
batchs = pd.read_csv('data/batch_sizes.csv', index_col=0, header=None)

In [7]:
lyrics_len = np.random.choice(batchs.index.tolist(), size=1)[0]
lyrics_len

1163

In [8]:
dataset = pd.read_csv('data/train_{}.csv'.format(lyrics_len))

In [9]:
dataset.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,291,292,293,294,295,296,297,298,299,300
0,36,63,54,1,56,64,61,53,54,63,...,54,1,1,0,30,63,1,69,57,54
1,48,43,54,67,68,54,1,11,49,1,...,71,54,1,63,64,51,64,53,74,1
2,23,54,69,69,54,67,1,63,64,69,...,72,58,63,53,64,72,1,1,0,41
3,30,1,72,58,61,61,1,63,64,69,...,54,67,1,50,63,53,1,30,1,59
4,30,63,1,54,52,57,64,54,53,1,...,63,72,50,67,53,68,1,72,58,69


In [10]:
dataset.shape

(43150, 301)

In [11]:
emb_dim = 128
input_length = 300
batch_size = batchs.loc[lyrics_len, 1]

In [12]:
batch_size

50

In [13]:
model = Sequential()
model.add(Embedding(input_dim=vocab_size, output_dim=emb_dim, input_length=input_length, batch_input_shape=(batch_size, input_length)))
model.add(Conv1D(filters=256, kernel_size=4, padding='same', activation='relu'))
model.add(MaxPooling1D(pool_size=2, padding='same'))
model.add(Dropout(.2))
model.add(Conv1D(filters=32, kernel_size=4, padding='same', activation='relu'))
model.add(MaxPooling1D(pool_size=2, padding='same'))
model.add(Bidirectional(LSTM(100, return_sequences=True, dropout=.2, recurrent_dropout=.2, stateful=True)))
model.add(BatchNormalization())
model.add(Bidirectional(LSTM(100, dropout=.2, recurrent_dropout=.2, stateful=True)))
model.add(BatchNormalization())
model.add(Dropout(.2))
model.add(Dense(vocab_size, activation='softmax'))

In [14]:
model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_1 (Embedding)      (50, 300, 128)            9728      
_________________________________________________________________
conv1d_1 (Conv1D)            (50, 300, 256)            131328    
_________________________________________________________________
max_pooling1d_1 (MaxPooling1 (50, 150, 256)            0         
_________________________________________________________________
dropout_1 (Dropout)          (50, 150, 256)            0         
_________________________________________________________________
conv1d_2 (Conv1D)            (50, 150, 32)             32800     
_________________________________________________________________
max_pooling1d_2 (MaxPooling1 (50, 75, 32)              0         
_________________________________________________________________
bidirectional_1 (Bidirection (50, 75, 200)             106400    
__________

In [15]:
model.set_weights(load_model('model_300.h5').get_weights())

In [16]:
model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

In [17]:
for _ in range(5):
    print(_)
    model.reset_states()
    model.fit(dataset.iloc[:, :-1].values, dataset.iloc[:, [-1]].values, epochs=1, batch_size=batch_size)

0
Epoch 1/1
1
Epoch 1/1
2
Epoch 1/1
3
Epoch 1/1
4
Epoch 1/1


In [18]:
# save model
model.save('model_300.h5')

In [19]:
pred_model = Sequential()
pred_model.add(Embedding(input_dim=vocab_size, output_dim=emb_dim, input_length=input_length, batch_input_shape=(1, input_length)))
pred_model.add(Conv1D(filters=256, kernel_size=4, padding='same', activation='relu'))
pred_model.add(MaxPooling1D(pool_size=2, padding='same'))
pred_model.add(Conv1D(filters=32, kernel_size=4, padding='same', activation='relu'))
pred_model.add(MaxPooling1D(pool_size=2, padding='same'))
pred_model.add(Bidirectional(LSTM(100, return_sequences=True, dropout=.2, recurrent_dropout=.2, stateful=True)))
pred_model.add(BatchNormalization())
pred_model.add(Bidirectional(LSTM(100, dropout=.2, recurrent_dropout=.2, stateful=True)))
pred_model.add(BatchNormalization())
pred_model.add(Dense(vocab_size, activation='softmax'))

In [20]:
pred_model.set_weights(model.get_weights())
pred_model.reset_states()

In [21]:
def implement(seed_text, maxlen=100, must_stop=2000, n_likely=5):
    cleaned = list(seed_text)
    padded_input_tokens = utils.tokenise(word2ind, cleaned)
    res_tokens = [token for token in padded_input_tokens]
    while must_stop > 0:
        padded_input_tokens = pad_sequences([padded_input_tokens], maxlen=maxlen)
        probs = pred_model.predict(padded_input_tokens, batch_size=1)[0]
        first_n = np.argsort(probs)[-n_likely:]
        probs = probs[first_n] / np.sum(probs[first_n])
        predicted = np.random.choice(first_n, p=probs)
        padded_input_tokens = padded_input_tokens[0].tolist()
        padded_input_tokens.append(predicted)
        res_tokens.append(predicted)
        must_stop -= 1
    detokenised = utils.detokenise(ind2word, res_tokens)
    return ''.join(detokenised)

In [22]:
res = implement('We are friends, good friends', 300, 800, 5)

In [23]:
print(res)

We are friends, good friends  
You get a some have  
  
You got to thafe an the hoed  
Loves the here  
Aneet thange's high  
I get my heart ous

(end)  
Take there an hers  
I were higher my hoar tiget  
You gaen all the thing  
I leeve hears tire to tergess  
Til the make it the on the only minde brosing  
The call my life that the groth in it  
And you were alonking mouses  
All the same of the let'd  
It this hurned all the shirt of the anofw  
But the the tomelter we loven  
As man to the see the take a that  
The anogether the star and my tatire  
The the when me away on home  
But the same a ang some our shill  
Aile it spititing only you  
And the mad at lesers find  
As to the prodything with some stonnit  
I left me the surting one of you  
I do the mart on all out move you better songs  
I've how together 
