In [1]:
%matplotlib inline
import importlib
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import utils

In [2]:
from keras.models import Model, load_model
from keras.layers import Input, Embedding, LSTM, Dense, Dropout, Flatten
from keras.models import Model

Using TensorFlow backend.


In [3]:
importlib.reload(utils)

<module 'utils' from '/home/fei/Documents/projects/lyrics/encoder-decoder/utils.py'>

In [4]:
word2ind, ind2word = utils.load_index_word_map()

In [5]:
n_tokens = len(word2ind)
latent_dim = 100

In [6]:
# Define an input sequence and process it.
encoder_inputs = Input(shape=(None, n_tokens))
encoder = LSTM(latent_dim, return_state=True)
encoder_outputs, state_h, state_c = encoder(encoder_inputs)
# We discard `encoder_outputs` and only keep the states.
encoder_states = [state_h, state_c]

# Set up the decoder, using `encoder_states` as initial state.
decoder_inputs = Input(shape=(None, n_tokens))
# We set up our decoder to return full output sequences,
# and to return internal states as well. We don't use the 
# return states in the training model, but we will use them in inference.
decoder_lstm = LSTM(latent_dim, return_sequences=True, return_state=True)
decoder_outputs, _, _ = decoder_lstm(decoder_inputs, initial_state=encoder_states)
decoder_dense = Dense(n_tokens, activation='softmax')
decoder_outputs = decoder_dense(decoder_outputs)
# Define the model that will turn
# `encoder_input_data` & `decoder_input_data` into `decoder_target_data`
model = Model([encoder_inputs, decoder_inputs], decoder_outputs)

In [7]:
# Compile & run training
model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

In [8]:
encoder_input_data_2d = pd.read_csv('encoder_input.csv').values
decoder_input_data_2d = pd.read_csv('decoder_input.csv').values
decoder_target_data = pd.read_csv('decoder_target.csv').values
decoder_target_data = decoder_target_data.reshape((decoder_target_data.shape[0], decoder_target_data.shape[1], -1))

In [9]:
def train_valid_split(length, test_size=0.3, random_state=43):
    np.random.RandomState(seed=random_state)
    choices = list(range(length))
    val_choices = np.random.choice(
        choices,
        int(length*test_size),
        replace=False
    ).tolist()
    train_choices = list(set(choices) - set(val_choices))
    return train_choices, val_choices

In [10]:
encoder_input_data_2d.shape

(4462, 40)

In [11]:
encoder_input_data_2d[0]

array([25, 54, 54, 50,  1, 40, 59,  1, 47, 44, 57,  1, 45, 40, 42, 44,  7,
        1, 48, 59,  4, 58,  1, 40,  1, 62, 54, 53, 43, 44, 57, 45, 60, 51,
        1, 45, 40, 42, 44,  1])

In [12]:
def make_3d_one_hot(input_2d):
    z_coords = input_2d.flatten()
    y_mesh, x_mesh = np.meshgrid(list(range(input_2d.shape[1])), list(range(input_2d.shape[0])))
    x_coords = x_mesh.flatten()
    y_coords = y_mesh.flatten()
    input_data = np.zeros((*input_2d.shape, n_tokens))
    input_data[x_coords, y_coords, z_coords] = 1
    return input_data

In [13]:
encoder_input_data = make_3d_one_hot(encoder_input_data_2d)
decoder_input_data = make_3d_one_hot(decoder_input_data_2d)

In [14]:
encoder_input_data.shape

(4462, 40, 66)

In [15]:
decoder_input_data.shape

(4462, 41, 66)

In [16]:
decoder_target_data.shape

(4462, 41, 1)

In [17]:
train_set, val_set = train_valid_split(encoder_input_data.shape[0])

In [18]:
model.summary()

____________________________________________________________________________________________________
Layer (type)                     Output Shape          Param #     Connected to                     
input_1 (InputLayer)             (None, None, 66)      0                                            
____________________________________________________________________________________________________
input_2 (InputLayer)             (None, None, 66)      0                                            
____________________________________________________________________________________________________
lstm_1 (LSTM)                    [(None, 100), (None,  66800       input_1[0][0]                    
____________________________________________________________________________________________________
lstm_2 (LSTM)                    [(None, None, 100), ( 66800       input_2[0][0]                    
                                                                   lstm_1[0][1]            

In [19]:
# Note that `decoder_target_data` needs to be one-hot encoded,
# rather than sequences of integers like `decoder_input_data`!
model.fit(
    [encoder_input_data[train_set], decoder_input_data[train_set]],
    decoder_target_data[train_set],
    validation_data=([encoder_input_data[val_set], decoder_input_data[val_set]], decoder_target_data[val_set]),
    batch_size=500,
    epochs=10
)

Train on 3124 samples, validate on 1338 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x7f41ed4837f0>

In [20]:
encoder_model = Model(encoder_inputs, encoder_states)
decoder_state_input_h = Input(shape=(latent_dim,))
decoder_state_input_c = Input(shape=(latent_dim,))
decoder_states_inputs = [decoder_state_input_h, decoder_state_input_c]
decoder_outputs, state_h, state_c = decoder_lstm(decoder_inputs, initial_state=decoder_states_inputs)
decoder_states = [state_h, state_c]
decoder_outputs = decoder_dense(decoder_outputs)
decoder_model = Model(
    [decoder_inputs] + decoder_states_inputs,
    [decoder_outputs] + decoder_states)

In [21]:
encoder_model.save('encoder_model.h5')
decoder_model.save('decoder_model.h5')