In [204]:
from keras.utils.data_utils import get_file
import numpy as np
from keras.layers import Embedding, \
    Input, \
    Dense, \
    Flatten, \
    merge, \
    SimpleRNN, \
    TimeDistributed
from keras.models import Model, \
    Sequential
from keras.optimizers import Adam

### Get Data

In [210]:
path = get_file('nietzsche.txt', origin="https://s3.amazonaws.com/text-datasets/nietzsche.txt")
text = open(path).read()
print('corpus length:', len(text))

('corpus length:', 600901)


In [211]:
chars = sorted(list(set(text)))
chars.insert(0, "\0")
chars_idx = {char:idx for idx, char in enumerate(chars)}
idx_chars = {idx:char for idx, char in enumerate(chars)}
vocab_size = len(chars)

In [4]:
cs = 4
char_1 = [chars_idx[text[i]] for i in range(0, len(text) - cs - 1, cs)]
char_2 = [chars_idx[text[i + 1]] for i in range(1, len(text) - cs - 1, cs)]
char_3 = [chars_idx[text[i + 2]] for i in range(1, len(text) - cs - 1, cs)]
char_4 = [chars_idx[text[i + 3]] for i in range(1, len(text) - cs - 1, cs)]

### Prepare Data

In [37]:
n_fac = 42

In [38]:
x1 = np.stack(char_1)
x2 = np.stack(char_2)
x3 = np.stack(char_3)
y = np.stack(char_4)

### Next char from 3 chars

In [39]:
def embedding_input(name):
    x_in = Input(shape=(1,), name=name)
    x_emb = Embedding(input_dim=vocab_size, output_dim=n_fac, input_length=1)(x_in)
    return x_in, Flatten()(x_emb)

In [40]:
x1_in, x1_emb = embedding_input('char1')
x2_in, x2_emb = embedding_input('char2')
x3_in, x3_emb = embedding_input('char3')

In [41]:
dense_in = Dense(256, activation='tanh')
dense_hidden = Dense(256, activation='tanh')

In [42]:
c1_hidden = dense_in(x1_emb)
hidden_2 = dense_hidden(c1_hidden)
c2_dense = dense_in(x2_emb)
c2_hidden = merge([hidden_2, c2_dense])
hidden_3 = dense_hidden(c2_hidden)
c3_dense = dense_in(x3_emb)
c3_hidden = merge([hidden_3, c3_dense])
c4_out = Dense(vocab_size, activation='softmax')((c3_hidden))

In [43]:
model = Model([x1_in, x2_in, x3_in], c4_out)

In [44]:
model.compile(optimizer=Adam(lr=0.00001), loss='sparse_categorical_crossentropy')

In [45]:
model.summary()

____________________________________________________________________________________________________
Layer (type)                     Output Shape          Param #     Connected to                     
char1 (InputLayer)               (None, 1)             0                                            
____________________________________________________________________________________________________
char2 (InputLayer)               (None, 1)             0                                            
____________________________________________________________________________________________________
char3 (InputLayer)               (None, 1)             0                                            
____________________________________________________________________________________________________
embedding_13 (Embedding)         (None, 1, 42)         3612        char1[0][0]                      
___________________________________________________________________________________________

In [46]:
model.fit([x1, x2, x3], y, nb_epoch=1, batch_size=64)

Epoch 1/1


<keras.callbacks.History at 0x7f8d489be150>

### Next char from previous n chars

In [135]:
num_prev_chars = 8
n_hidden = 256

In [136]:
c_in_list = [[chars_idx[text[cs + i]] for i in range(num_prev_chars)] for cs in range(len(text) - num_prev_chars)]
y_list = [chars_idx[text[i]] for i in range(num_prev_chars, len(text))]

In [137]:
c_in = np.stack(c_in_list)
y = np.stack(y_list)

In [138]:
chk = [c_in[:, col_num] for col_num in range(c_in.shape[1])]

In [139]:
dense_in = Dense(n_hidden, activation='relu')
dense_hidden = Dense(n_hidden, activation='tanh', init='identity')
dense_out = Dense(vocab_size, activation='softmax')

In [140]:
inp = []
emb = []
for char_num in range(num_prev_chars):
    ci_in, ci_emb = embedding_input('char_' + str(char_num + 1))
    inp.append(ci_in)
    emb.append(ci_emb)

In [141]:
ci_dense = num_prev_chars*[None]
ci_hidden = num_prev_chars*[None]
ci_hidden_inp = num_prev_chars*[None]
ci_hidden[0] = dense_in(emb[0])
for char_num in range(1, num_prev_chars):
    ci_dense[char_num] = dense_in(emb[char_num])
    ci_hidden_inp[char_num] = dense_hidden(ci_hidden[char_num - 1])
    ci_hidden[char_num] = merge([ci_dense[char_num], ci_hidden_inp[char_num]])
c_out = dense_out(ci_hidden[num_prev_chars - 1])

In [142]:
model = Model(inp, c_out)

In [143]:
model.compile(optimizer=Adam(lr=0.001), loss='sparse_categorical_crossentropy')

In [144]:
model.fit(chk, y, nb_epoch=1)

Epoch 1/1


<keras.callbacks.History at 0x7f8d2492c2d0>

### RNN in Keras

In [147]:
n_fac, vocab_size, n_hidden, num_prev_chars = (42, vocab_size, 256, 8)

In [153]:
model = Sequential([Embedding(input_dim=vocab_size, output_dim=n_fac, input_length=num_prev_chars),
                    SimpleRNN(n_hidden),
                    Dense(vocab_size, activation='softmax')])

In [154]:
model.compile(optimizer=Adam(lr=0.001), loss='sparse_categorical_crossentropy')

In [156]:
model.fit(c_in, y, nb_epoch=1)

Epoch 1/1


<keras.callbacks.History at 0x7f8d250c5d50>

In [160]:
def get_next_keras(inp):
    idxs = [chars_idx[c] for c in inp]
    arrs = np.array(idxs)[np.newaxis,:]
    p = model.predict(arrs)[0]
    return idx_chars[np.argmax(p)]

In [161]:
get_next_keras('queens a')

'n'

### Return Sequences in Keras

In [212]:
n_fac, vocab_size, n_hidden, num_prev_chars = (42, vocab_size, 256, 8)

In [220]:
c_in_seq = c_in[:(c_in.shape[0] - 1)]
c_out_seq = np.atleast_3d(c_in[1:])

In [221]:
model = Sequential([Embedding(input_dim=vocab_size, output_dim=n_fac, input_length=num_prev_chars),
                    SimpleRNN(n_hidden, activation='tanh', inner_init='identity', return_sequences=True),
                    TimeDistributed(Dense(vocab_size, activation='softmax'))])

In [222]:
model.summary()

____________________________________________________________________________________________________
Layer (type)                     Output Shape          Param #     Connected to                     
embedding_72 (Embedding)         (None, 8, 42)         3612        embedding_input_6[0][0]          
____________________________________________________________________________________________________
simplernn_6 (SimpleRNN)          (None, 8, 256)        76544       embedding_72[0][0]               
____________________________________________________________________________________________________
timedistributed_4 (TimeDistribut (None, 8, 86)         22102       simplernn_6[0][0]                
Total params: 102,258
Trainable params: 102,258
Non-trainable params: 0
____________________________________________________________________________________________________


In [223]:
model.compile(optimizer=Adam(lr=0.001), loss='sparse_categorical_crossentropy')

In [224]:
model.fit(c_in_seq, c_out_seq, nb_epoch=1, batch_size=64)

Epoch 1/1


<keras.callbacks.History at 0x7f8cff931f90>