In [29]:
from theano.sandbox import cuda

In [30]:
%matplotlib inline
import utils; reload(utils)
from utils import *
from __future__ import division, print_function

In [31]:
path = get_file('nietzsche.txt', origin="https://s3.amazonaws.com/text-datasets/nietzsche.txt")
text = open(path).read()
print('corpus length:', len(text))

corpus length: 600901


In [32]:
chars = sorted(list(set(text)))
vocab_size = len(chars)+1
print('total chars:', vocab_size)

total chars: 86


In [33]:
chars.insert(0, "\0")

In [34]:
''.join(chars[1:-6])

'\n !"\'(),-.0123456789:;=?ABCDEFGHIJKLMNOPQRSTUVWXYZ[]_abcdefghijklmnopqrstuvwxyz'

In [35]:
char_indices = dict((c, i) for i, c in enumerate(chars))
indices_char = dict((i, c) for i, c in enumerate(chars))

In [36]:
idx = [char_indices[c] for c in text]

In [37]:
idx[:10]

[40, 42, 29, 30, 25, 27, 29, 1, 1, 1]

In [38]:
''.join(indices_char[i] for i in idx[:70])

'PREFACE\n\n\nSUPPOSING that Truth is a woman--what then? Is there not gro'

In [39]:
cs=3
c1_dat = [idx[i] for i in xrange(0, len(idx)-1-cs, cs)]
c2_dat = [idx[i+1] for i in xrange(0, len(idx)-1-cs, cs)]
c3_dat = [idx[i+2] for i in xrange(0, len(idx)-1-cs, cs)]
c4_dat = [idx[i+3] for i in xrange(0, len(idx)-1-cs, cs)]

In [40]:
x1 = np.stack(c1_dat[:-2])
x2 = np.stack(c2_dat[:-2])
x3 = np.stack(c3_dat[:-2])

In [41]:
y = np.stack(c4_dat[:-2])

In [42]:
x1[:4], x2[:4], x3[:4]

(array([40, 30, 29,  1]), array([42, 25,  1, 43]), array([29, 27,  1, 45]))

In [43]:
y[:4]

array([30, 29,  1, 40])

In [44]:
x1.shape, y.shape

((200297,), (200297,))

In [45]:
n_fac = 42

In [46]:
def embedding_input(name, n_in, n_out):
    inp = Input(shape=(1,), dtype='int64', name=name)
    emb = Embedding(n_in, n_out, input_length=1)(inp)
    return inp, Flatten()(emb)

In [47]:
c1_in, c1 = embedding_input('c1', vocab_size, n_fac)
c2_in, c2 = embedding_input('c2', vocab_size, n_fac)
c3_in, c3 = embedding_input('c3', vocab_size, n_fac)

In [48]:
n_hidden = 256

In [49]:
dense_in = Dense(n_hidden, activation='relu')

In [50]:
c1_hidden = dense_in(c1)

In [51]:
dense_hidden = Dense(n_hidden, activation='tanh')

In [52]:
c2_dense = dense_in(c2)
hidden_2 = dense_hidden(c1_hidden)
c2_hidden = merge([c2_dense, hidden_2])

In [53]:
c3_dense = dense_in(c3)
hidden_3 = dense_hidden(c2_hidden)
c3_hidden = merge([c3_dense, hidden_3])

In [54]:
dense_out = Dense(vocab_size, activation='softmax')

In [55]:
c4_out = dense_out(c3_hidden)

In [56]:
model = Model([c1_in, c2_in, c3_in], c4_out)

In [57]:
model.compile(loss='sparse_categorical_crossentropy', optimizer = Adam())

In [58]:
model.optimizer.lr = 0.000001

In [59]:
model.fit([x1,x2,x3], y, batch_size=64, nb_epoch=4)

Epoch 1/4
Epoch 2/4
Epoch 3/4
Epoch 4/4


<keras.callbacks.History at 0x7fa5d72f6590>

In [69]:
model.optimizer.lr = 0.01
model.fit([x1,x2,x3], y, batch_size=64, nb_epoch=4)

Epoch 1/4
Epoch 2/4
Epoch 3/4
Epoch 4/4


<keras.callbacks.History at 0x7fa5ce3d1090>

In [62]:
def get_next(inp):
    idxs = [char_indices[c] for c in inp]
    arrs = [np.array(i)[np.newaxis] for i in idxs]
    p = model.predict(arrs)
    i = np.argmax(p)
    return chars[i]

In [70]:
get_next(' th')

' '

In [65]:
get_next(' an')

' '

In [71]:
# FIRST RNN
cs = 8
c_in_dat = [[idx[i+n] for i in xrange(0, len(idx)-1-cs, cs)] for n in range(cs)]

In [72]:
c_out_dat = [idx[i+cs] for i in xrange(0, len(idx)-1-cs, cs)]

In [73]:
xs = [np.stack(c[:-2]) for c in c_in_dat]

In [74]:
len(xs), xs[0].shape

(8, (75110,))

In [75]:
y = np.stack(c_out_dat[:-2])

In [76]:
[xs[n][:cs] for n in range(cs)]

[array([40,  1, 33,  2, 72, 67, 73,  2]),
 array([42,  1, 38, 44,  2,  9, 61, 73]),
 array([29, 43, 31, 71, 54,  9, 58, 61]),
 array([30, 45,  2, 74,  2, 76, 67, 58]),
 array([25, 40, 73, 73, 76, 61, 24, 71]),
 array([27, 40, 61, 61, 68, 54,  2, 58]),
 array([29, 39, 54,  2, 66, 73, 33,  2]),
 array([ 1, 43, 73, 62, 54,  2, 72, 67])]

In [77]:
y[:cs]

array([ 1, 33,  2, 72, 67, 73,  2, 68])

In [78]:
# Number of latent factors
n_fac = 42

In [80]:
def embedding_input(name, n_in, n_out):
    inp = Input(shape=(1,), dtype='int64', name=name+'_in')
    emb = Embedding(n_in, n_out, input_length=1, name=name+'_emb')(inp)
    return inp, Flatten()(emb)

In [81]:
c_ins = [embedding_input('c'+str(n), vocab_size, n_fac) for n in range(cs)]

In [82]:
n_hidden = 256

In [83]:
dense_in = Dense(n_hidden, activation='relu')
dense_hidden = Dense(n_hidden, activation='relu', init='identity')
dense_out = Dense(vocab_size, activation='softmax')

In [84]:
hidden= dense_in(c_ins[0][1])

In [86]:
for i in range(1, cs):
    c_dense = dense_in(c_ins[i][1])
    hidden = dense_hidden(hidden)
    hidden = merge([c_dense, hidden])

In [87]:
c_out = dense_out(hidden)

In [88]:
model = Model([c[0] for c in c_ins], c_out)
model.compile(loss='sparse_categorical_crossentropy', optimizer=Adam())

In [89]:
model.fit(xs, y, batch_size=64, nb_epoch=12)

Epoch 1/12
Epoch 2/12
Epoch 3/12
Epoch 4/12
Epoch 5/12
Epoch 6/12
Epoch 7/12
Epoch 8/12
Epoch 9/12
Epoch 10/12
Epoch 11/12
Epoch 12/12


<keras.callbacks.History at 0x7fa5cb8e5890>

In [90]:
def get_next(inp):
    idxs = [np.array(char_indices[c])[np.newaxis] for c in inp]
    p = model.predict(idxs)
    return chars[np.argmax(p)]

In [91]:
get_next('for thos')

'e'

In [93]:
get_next('queens o')

'f'

In [96]:
get_next('part of ')

't'

In [98]:
n_hidden, n_fac, cs, vocab_size = (256, 42, 8, 86)

In [99]:
model = Sequential([
        Embedding(vocab_size, n_fac, input_length=cs),
        SimpleRNN(n_hidden, activation='relu', inner_init='identity'),
        Dense(vocab_size, activation='softmax')
    ])

In [100]:
model.summary()

____________________________________________________________________________________________________
Layer (type)                     Output Shape          Param #     Connected to                     
embedding_7 (Embedding)          (None, 8, 42)         3612        embedding_input_1[0][0]          
____________________________________________________________________________________________________
simplernn_1 (SimpleRNN)          (None, 256)           76544       embedding_7[0][0]                
____________________________________________________________________________________________________
dense_9 (Dense)                  (None, 86)            22102       simplernn_1[0][0]                
Total params: 102258
____________________________________________________________________________________________________


In [101]:
model.compile(loss='sparse_categorical_crossentropy', optimizer=Adam())

In [102]:
model.fit(np.stack(xs,1), y, batch_size=64, nb_epoch=8)

Exception: Error when checking model input: expected embedding_input_1 to have 2 dimensions, but got array with shape (75110, 8, 1)