In [1]:
import importlib
import numpy as np
import pandas as pd
import utils

In [2]:
importlib.reload(utils)

<module 'utils' from '/home/fei/Documents/projects/lyrics/stacked_lstm_statefull/utils.py'>

In [3]:
from keras.models import Sequential, load_model
from keras.layers import Embedding, LSTM, Dense, Dropout, Flatten, Conv1D, MaxPooling1D, BatchNormalization, Bidirectional
from keras.preprocessing.sequence import pad_sequences
# from keras.optimizers import Adam

Using TensorFlow backend.


In [4]:
word2ind, ind2word = utils.load_index_word_map()

In [5]:
vocab_size = len(word2ind)

In [6]:
batchs = pd.read_csv('data/batch_sizes.csv', index_col=0, header=None)

In [7]:
lyrics_len = np.random.choice(batchs.index.tolist(), size=1)[0]
lyrics_len

3128

In [8]:
dataset = pd.read_csv('data/train_{}.csv'.format(lyrics_len))

In [9]:
dataset.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,291,292,293,294,295,296,297,298,299,300
0,48,59,50,74,8,75,49,1,1,0,...,44,54,1,55,61,74,1,57,58,56
1,59,50,74,8,75,49,1,1,0,39,...,54,1,55,61,74,1,57,58,56,57
2,50,74,8,75,49,1,1,0,39,54,...,1,55,61,74,1,57,58,56,57,1
3,74,8,75,49,1,1,0,39,54,62,...,55,61,74,1,57,58,56,57,1,69
4,8,75,49,1,1,0,39,54,62,58,...,61,74,1,57,58,56,57,1,69,64


In [10]:
dataset.shape

(2828, 301)

In [11]:
emb_dim = 128
input_length = 300
batch_size = batchs.loc[lyrics_len, 1]

In [12]:
batch_size

1

In [13]:
model = Sequential()
model.add(Embedding(input_dim=vocab_size, output_dim=emb_dim, input_length=input_length, batch_input_shape=(batch_size, input_length)))
model.add(Conv1D(filters=256, kernel_size=4, padding='same', activation='relu'))
model.add(MaxPooling1D(pool_size=2, padding='same'))
model.add(Dropout(.2))
model.add(Conv1D(filters=32, kernel_size=4, padding='same', activation='relu'))
model.add(MaxPooling1D(pool_size=2, padding='same'))
model.add(Bidirectional(LSTM(100, return_sequences=True, dropout=.2, recurrent_dropout=.2, stateful=True)))
model.add(BatchNormalization())
model.add(Bidirectional(LSTM(100, dropout=.2, recurrent_dropout=.2, stateful=True)))
model.add(BatchNormalization())
model.add(Dropout(.2))
model.add(Dense(vocab_size, activation='softmax'))

In [14]:
model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_1 (Embedding)      (1, 300, 128)             9728      
_________________________________________________________________
conv1d_1 (Conv1D)            (1, 300, 256)             131328    
_________________________________________________________________
max_pooling1d_1 (MaxPooling1 (1, 150, 256)             0         
_________________________________________________________________
dropout_1 (Dropout)          (1, 150, 256)             0         
_________________________________________________________________
conv1d_2 (Conv1D)            (1, 150, 32)              32800     
_________________________________________________________________
max_pooling1d_2 (MaxPooling1 (1, 75, 32)               0         
_________________________________________________________________
bidirectional_1 (Bidirection (1, 75, 200)              106400    
__________

In [15]:
model.set_weights(load_model('model_300.h5').get_weights())

In [16]:
model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

In [17]:
for _ in range(5):
    print(_)
    model.reset_states()
    model_hist = model.fit(dataset.iloc[:, :-1].values, dataset.iloc[:, [-1]].values, epochs=1, batch_size=batch_size)
    if model_hist.history['acc'][0] > .55: break

0
Epoch 1/1
1
Epoch 1/1
2
Epoch 1/1
3
Epoch 1/1
4
Epoch 1/1


In [18]:
# save model
model.save('model_300.h5')

In [19]:
pred_model = Sequential()
pred_model.add(Embedding(input_dim=vocab_size, output_dim=emb_dim, input_length=input_length, batch_input_shape=(1, input_length)))
pred_model.add(Conv1D(filters=256, kernel_size=4, padding='same', activation='relu'))
pred_model.add(MaxPooling1D(pool_size=2, padding='same'))
pred_model.add(Conv1D(filters=32, kernel_size=4, padding='same', activation='relu'))
pred_model.add(MaxPooling1D(pool_size=2, padding='same'))
pred_model.add(Bidirectional(LSTM(100, return_sequences=True, dropout=.2, recurrent_dropout=.2, stateful=True)))
pred_model.add(BatchNormalization())
pred_model.add(Bidirectional(LSTM(100, dropout=.2, recurrent_dropout=.2, stateful=True)))
pred_model.add(BatchNormalization())
pred_model.add(Dense(vocab_size, activation='softmax'))

In [20]:
pred_model.set_weights(model.get_weights())

In [21]:
def implement(seed_text, maxlen=100, must_stop=2000, n_likely=5):
    cleaned = list(seed_text)
    padded_input_tokens = utils.tokenise(word2ind, cleaned)
    res_tokens = [token for token in padded_input_tokens]
    pred_model.reset_states()
    while must_stop > 0:
        padded_input_tokens = pad_sequences([padded_input_tokens], maxlen=maxlen)
        probs = pred_model.predict(padded_input_tokens, batch_size=1)[0]
        first_n = np.argsort(probs)[-n_likely:]
        probs = probs[first_n] / np.sum(probs[first_n])
        predicted = np.random.choice(first_n, p=probs)
        padded_input_tokens = padded_input_tokens[0].tolist()
        padded_input_tokens.append(predicted)
        res_tokens.append(predicted)
        must_stop -= 1
    detokenised = utils.detokenise(ind2word, res_tokens)
    return ''.join(detokenised)

In [22]:
res = implement('We are friends, good friends', 300, 800, 5)

In [23]:
print(res)

We are friends, good friends
Fux[Fre
]

q(exque...GaKquy)

(end)))

(end)
(Free)

(end)))

(end)
(Free)

(end)
(Free

(end)
(GeK

que.

(end)))

(end)))

(end)
(Free)

(end)
(Free)

(end)])

(end)
(Free)

(end)
(Free.

(enK]
(
(GeKque'FneKque.......G]....

(end)))

(end)
(Free.

(end)
((FreeKque'HL])
(:KKque'Fne!

(enx-juxK

(enx)
(Ge?

(end)
(GetKy)

(enxFy)

(end)))

(end)
(GeK]

(enxFy)

(enxFexK]

(end)))

(end)
(Feelque's jux)

(enK]

(end)))

(end)
(
FuxGixGe2Kque'Kque..

(enK])

(enx)
(GeK]

(enxD])

(enx)))

(end)
(
Goqued

(end)
(
Goque.

(end)
(GeKque'FneKque........GoKKKKKKFKque!...

(end)
(GeK]

(end)))

(end)
(
FuxGe-2'FLe.)

(enxFexFy)

(end)
(GeK

(enxFexFy)]

(enxFexK]

(end)))

(end)
(GeK

(enxLoK]

(end)
((Free 2]

que!

(end)
(FelxK]

(enx-juxK

(enx:])

(end)))

(end)
(GetKy

(enx)
(Ge?

(enx)
(
(
