# Create a RNN model to text generation
- RNN model at character level
    - Input: n character previous
    - Output: next character
    - Model LSTM
- Use 'El Quijote' to train the generator


In [1]:
# Header
path_base = '/Users/jorge/'
#path_base = '/home/jorge/'

path = path_base + 'data/training/keras/'


import numpy as np
import theano

Using gpu device 0: GeForce GT 750M (CNMeM is disabled, cuDNN 5005)


## Download data and generate sequences

In [2]:
#Download quijote from guttenberg project
# wget http://www.gutenberg.org/ebooks/996.txt.utf-8
    

In [3]:
text = open(path + "996.txt.utf-8").read().lower()
print('corpus length:', len(text))

chars = set(text)
print('total chars:', len(chars))

#Dictionaries to convert char to num & num to char
char_indices = dict((c, i) for i, c in enumerate(chars))
indices_char = dict((i, c) for i, c in enumerate(chars))


('corpus length:', 2347796)
('total chars:', 63)


In [4]:
# cut the text in semi-redundant sequences of maxlen characters
# One sentence of length 20 for each 3 characters
maxlen = 20
step = 3
sentences = []
next_chars = []
for i in range(0, len(text) - maxlen, step):
    sentences.append(text[i: i + maxlen])
    next_chars.append(text[i + maxlen])
print('nb sequences:', len(sentences))

('nb sequences:', 782592)


## Train the model

In [5]:
'''
X: One row by sentence
    in each row a matrix of bool 0/1 of dim length_sentence x num_chars coding the sentence. Dummy variables
y: One row by sentence
    in each row a vector of bool of lengt num_chars with 1 in the next char position
'''

print('Vectorization...')
X = np.zeros((len(sentences), maxlen, len(chars)), dtype=np.bool)
y = np.zeros((len(sentences), len(chars)), dtype=np.bool)
for i, sentence in enumerate(sentences):
    for t, char in enumerate(sentence):
        X[i, t, char_indices[char]] = 1
    y[i, char_indices[next_chars[i]]] = 1

print('X shape: ',X.shape)
print('y shape: ',y.shape)

Vectorization...
('X shape: ', (782592, 20, 63))
('y shape: ', (782592, 63))


In [6]:
# build the model: 2 stacked LSTM
from keras.models import Model
from keras.layers import Input, Dense, Dropout, LSTM


print('Build model 1')
seq_prev_input = Input(shape=(maxlen, len(chars)), name='prev') 
                
# apply forwards LSTM
forwards1 = LSTM(512, return_sequences=True)(seq_prev_input)
dp1 = Dropout(0.25)(forwards1)

forwards2 = LSTM(512, return_sequences=False)(dp1)
dp2 = Dropout(0.5)(forwards2)

output = Dense(len(chars), activation='softmax')(dp2)

model1 = Model(input=seq_prev_input, output=output)

# try using different optimizers and different optimizer configs
model1.compile(loss='categorical_crossentropy', optimizer='rmsprop', metrics=['accuracy'])


Build model 1


Using Theano backend.


In [7]:
#Fit model
model1.fit(X[:600000], y[:600000], batch_size=256, nb_epoch=30,
           validation_data=(X[600000:], y[600000:]))



Train on 600000 samples, validate on 182592 samples
Epoch 1/30
 12800/600000 [..............................] - ETA: 1635s - loss: 3.1993 - acc: 0.1416

KeyboardInterrupt: 

In [None]:
#Save model
model_name = 'text_generation_model1'

json_string = model1.to_json()
open(path + 'models/mdl_' + model_name + '.json', 'w').write(json_string)
model1.save_weights(path + 'models/w_' + model_name + '.h5')


## Evaluate model

In [None]:
# Load model
from keras.models import model_from_json

model_name = 'text_generation_model1'

model1 = model_from_json(open(path + 'models/mdl_' + model_name + '.json').read())
model1.load_weights(path + 'models/w_' + model_name + '.h5')

In [None]:
def sample(a, diversity=1.0):
    '''
    helper function to sample an index from a probability array
    - Diversity control the level of randomless
    '''
    a = np.log(a) / diversity
    a = np.exp(a) / np.sum(np.exp(a))
    return np.argmax(np.random.multinomial(1, a, 1))


def generate_text(sentence, diversity, current_model, num_char=400):
    sentence_init = sentence
    generated = ''
    for i in range(400):
        x = np.zeros((1, maxlen, len(chars)))
        for t, char in enumerate(sentence):
            x[0, t, char_indices[char]] = 1.
        preds = current_model.predict(x, verbose=0)[0]
        next_index = sample(preds, diversity)
        next_char = indices_char[next_index]
        generated += next_char
        sentence = sentence[1:] + next_char
    print('\n\nDIVERSITY: ',diversity,'\n')
    print(sentence_init + generated)

sentence = 'mire vuestra merced '
generate_text(sentence, 0.2, model)
generate_text(sentence, 0.5, model)
generate_text(sentence, 1,   model)
generate_text(sentence, 1.2, model)


In [12]:
# Build a second model more complex
# - 2 stacked LSTM. More hidden and more regularization.
# - Direc connetions
from keras.models import Model
from keras.layers import Input, Dense, Dropout, LSTM, Merge




print('Build model 2')
seq_prev_input = Input(shape=(maxlen, len(chars)), name='prev') 
                
# apply forwards LSTM
forwards1 = LSTM(1024, return_sequences=True)(seq_prev_input)
dp_forwards1 = Dropout(0.5)(forwards1)

# Combine input with output of first recurrent layer as input od second recurrent layer
merge1 = Merge([seq_prev_input, forwards1], mode='concat', concat_axis=-1)

# Second forwards layer
forwards2 = LSTM(512, return_sequences=False)(merge1)
dp_forwards2 = Dropout(0.5)(forwards2)

# Combine output of the 2 recurrent layers.
# Select the last sequence output of the first LSTM layer
merge2 = Merge([dp_forwards1[:,-1,:], dp_forwards2], mode='concat', concat_axis=-1)

output = Dense(len(chars), activation='softmax')(merge2)

model2 = Model(input=seq_prev_input, output=output)

# try using different optimizers and different optimizer configs
model2.compile(loss='categorical_crossentropy', optimizer='rmsprop', metrics=['accuracy'])

#Pending connect direct inputs to direct outputs ans show graph




Build model 2


AttributeError: 'TensorVariable' object has no attribute 'get_output_shape_at'

## Evaluate results over time
- Interactive training


In [None]:
#Excution generating a sample of output each 5 iterations and with different diversity

for iteration in range(1, 30):
    model2.fit(X, y, batch_size=128, nb_epoch=1)
    if iteration in (1,5,10,15,20,25,30):
        start_index = random.randint(0, len(text) - maxlen - 1)
        sentence = text[start_index: start_index + maxlen]
        print()
        print('-' * 50)
        print('Iteration: ', iteration)
        print('Seed: ', sentence)
        generate_text(sentence, 0.2, model2)
        generate_text(sentence, 0.5, model2)
        generate_text(sentence, 1,   model2)
        generate_text(sentence, 1.2, model2)
