# Compose: Training a model to generate text

In [29]:
import os
import pickle
import numpy as np
#from music21 import note, chord

from keras.callbacks import ModelCheckpoint, EarlyStopping
from keras.utils import plot_model
from keras.utils import np_utils
from keras.layers import LSTM, Input, Dropout, Dense, Activation, Embedding, Concatenate, Reshape
from keras.layers import Flatten, RepeatVector, Permute, TimeDistributed
from keras.layers import Multiply, Lambda, Softmax
import keras.backend as K 
from keras.models import Model
from keras.optimizers import RMSprop

## Set parameters

In [5]:
# run params
section = 'composetxt'
run_id = '0001'
txt_name = 'txtattn'

run_folder = 'run/{}/'.format(section)
run_folder += '_'.join([run_id, txt_name])

store_folder = os.path.join(run_folder, 'store')
data_folder = os.path.join('data', txt_name)

if not os.path.exists(run_folder):
    os.mkdir(run_folder)
    os.mkdir(os.path.join(run_folder, 'store'))
    os.mkdir(os.path.join(run_folder, 'output'))
    os.mkdir(os.path.join(run_folder, 'weights'))
    os.mkdir(os.path.join(run_folder, 'viz'))

mode = 'build' # 'load' # 

# data params
intervals = range(1)
seq_len = 32

# model params
embed_size = 100
rnn_units = 256
use_attention = True

## Extract the text

In [7]:
import re

token_type = 'word'

#load in the text and perform some cleanup

seq_length = 20

filename = "./data/aesop/data.txt"

with open(filename, encoding='utf-8-sig') as f:
    text = f.read()    
    
#removing text before and after the main stories
start = text.find("THE FOX AND THE GRAPES\n\n\n")
end = text.find("ILLUSTRATIONS\n\n\n[")
text = text[start:end]

start_story = '| ' * seq_length
    
text = start_story + text
text = text.lower()
text = text.replace('\n\n\n\n\n', start_story)
text = text.replace('\n', ' ')
text = re.sub('  +', '. ', text).strip()
text = text.replace('..', '.')

text = re.sub('([!"#$%&()*+,-./:;<=>?@[\]^_`{|}~])', r' \1 ', text)
text = re.sub('\s{2,}', ' ', text)

len(text)

213694

In [13]:
from keras.preprocessing.text import Tokenizer

if token_type == 'word':
    tokenizer = Tokenizer(char_level = False, filters = '')
else:
    tokenizer = Tokenizer(char_level = True, filters = '', lower = False)    
    
tokenizer.fit_on_texts([text])

total_words = len(tokenizer.word_index) + 1

token_list = tokenizer.texts_to_sequences([text])[0]

print(total_words)

print(str(tokenizer.word_index)[0:50]) # vocabulary
print(token_list[:50]) # tokenized text

4170
{'|': 1, ',': 2, 'the': 3, 'and': 4, '.': 5, 'a': 
[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 3, 56, 4, 3, 940, 5, 6, 382, 56, 94, 77, 216, 1557, 9, 940, 941, 62, 6, 581, 20, 12, 2226, 162, 6, 359, 2227, 2, 4, 158, 11]


In [None]:
def generate_sequences(token_list, step):
    
    X = []
    y = []

    for i in range(0, len(token_list) - seq_length, step):
        X.append(token_list[i: i + seq_length])
        y.append(token_list[i + seq_length])
    

    y = np_utils.to_categorical(y, num_classes = total_words)
    
    num_seq = len(X)
    print('Number of sequences:', num_seq, "\n")
    
    return X, y, num_seq

step = 1
seq_length = 20

X, y, num_seq = generate_sequences(token_list, step)

X = np.array(X)
y = np.array(y)

## Prepare network I/O

In [19]:
def prepare_sequences(text, n_tokens, seq_len =32):
    """ Prepare the sequences used to train the Neural Network """

    text_network_input = []
    text_network_output = []

    # create input sequences and the corresponding outputs
    for i in range(len(text) - seq_len): 
        text_network_input.append(text[i:i + seq_len])
        text_network_output.append(text[i + seq_len])

    n_patterns = len(text_network_input)

    # reshape the input into a format compatible with LSTM layers
    text_network_input = np.reshape(text_network_input, (n_patterns, seq_len))
    network_input = [text_network_input]

    text_network_output = np_utils.to_categorical(text_network_output, num_classes=n_tokens)
    network_output = [text_network_output]

    return (network_input, network_output)

In [21]:
#network_input, network_output = prepare_sequences(notes, durations, lookups, distincts, seq_len)
network_input, network_output = prepare_sequences(token_list, len(tokenizer.word_index)+1, seq_len)

In [22]:
print('text input')
print(network_input[0])

print('text output')
print(network_output[0])

text input
[[   1    1    1 ...   94   77  216]
 [   1    1    1 ...   77  216 1557]
 [   1    1    1 ...  216 1557    9]
 ...
 [  13    2    8 ...  384    5    8]
 [   2    8   53 ...    5    8    5]
 [   8   53   22 ...    8    5 4169]]
text output
[[0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 ...
 [0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 1.]
 [0. 0. 0. ... 0. 0. 0.]]


## Create the structure of the neural network

In [27]:
def create_network(n_tokens, embed_size = 100, rnn_units = 256, use_attention = False):
    """ create the structure of the neural network """

    text_in = Input(shape = (None,))

    x1 = Embedding(n_tokens, embed_size)(text_in) 

    #x = Concatenate()([x1,x2])
    x = x1 # todo: remove and change x1

    x = LSTM(rnn_units, return_sequences=True)(x)
    # x = Dropout(0.2)(x)

    if use_attention:

        x = LSTM(rnn_units, return_sequences=True)(x)
        # x = Dropout(0.2)(x)

        e = Dense(1, activation='tanh')(x)
        e = Reshape([-1])(e)
        alpha = Activation('softmax')(e)

        alpha_repeated = Permute([2, 1])(RepeatVector(rnn_units)(alpha)) # todo: check the 2, 1

        c = Multiply()([x, alpha_repeated])
        c = Lambda(lambda xin: K.sum(xin, axis=1), output_shape=(rnn_units,))(c)
    
    else:
        c = LSTM(rnn_units)(x)
        # c = Dropout(0.2)(c)
                                    
    text_out = Dense(n_tokens, activation = 'softmax', name = 'text')(c)
   
    model = Model([text_in], [text_out])

    if use_attention:
        att_model = Model([text_in], alpha)
    else:
        att_model = None

    opti = RMSprop(lr = 0.001)
    model.compile(loss=['categorical_crossentropy'], optimizer=opti)

    return model, att_model

In [30]:
#model, att_model = create_network(n_tokens, embed_size, rnn_units, use_attention)
model, att_model = create_network(len(tokenizer.word_index)+1, embed_size, rnn_units, use_attention)
model.summary()

Model: "model_1"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_2 (InputLayer)            (None, None)         0                                            
__________________________________________________________________________________________________
embedding_2 (Embedding)         (None, None, 100)    417000      input_2[0][0]                    
__________________________________________________________________________________________________
lstm_3 (LSTM)                   (None, None, 256)    365568      embedding_2[0][0]                
__________________________________________________________________________________________________
lstm_4 (LSTM)                   (None, None, 256)    525312      lstm_3[0][0]                     
____________________________________________________________________________________________

## Train the neural network

In [35]:
weights_folder = os.path.join(run_folder, 'weights')
# model.load_weights(os.path.join(weights_folder, "weights.h5"))

run/composetxt/0001_txtattn
run/composetxt/0001_txtattn\weights\weights.h5


In [38]:
weights_folder = os.path.join(run_folder, 'weights')

checkpoint1 = ModelCheckpoint(
    os.path.join(weights_folder, "weights-improvement-{epoch:02d}-{loss:.4f}-bigger.h5"),
    monitor='loss',
    verbose=0,
    save_best_only=True,
    mode='min'
)

checkpoint2 = ModelCheckpoint(
    os.path.join(weights_folder, "weights.h5"),
    monitor='loss',
    verbose=0,
    save_best_only=True,
    mode='min'
)

early_stopping = EarlyStopping(
    monitor='loss'
    , restore_best_weights=True
    , patience = 10
)


callbacks_list = [
    checkpoint1
    , checkpoint2
    , early_stopping
 ]

model.save_weights(os.path.join(weights_folder, "weights.h5"))
model.fit(network_input, network_output
          , epochs=10, batch_size=32 # 200 epochs
          , validation_split = 0.2
          , callbacks=callbacks_list
          , shuffle=True
         )


  "Converting sparse IndexedSlices to a dense Tensor of unknown shape. "


Train on 40309 samples, validate on 10078 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.callbacks.History at 0x249bc1ecb70>