In [None]:
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline

In [None]:
import os
start_token = " "

with open("names") as f:
    names = f.read()[:-1].split('\n')
    names = [start_token+name for name in names]

In [None]:
MAX_LENGTH = max(map(len,names))
print("max length =", MAX_LENGTH)

# plt.title('Sequence length distribution')
# plt.hist(list(map(len,names)),bins=25);

In [None]:
src = []
for name in names:
    for tok in name:
        src.append(tok)

tokens = set(src)

tokens = list(tokens)

n_tokens = len(tokens)
print ('n_tokens = ',n_tokens)

assert 50 < n_tokens < 60

In [None]:
token_to_id = {}
for tok in tokens:
    token_to_id[tok] = tokens.index(tok)

In [None]:
assert len(tokens) == len(token_to_id), "dictionaries must have same size"

for i in range(n_tokens):
    assert token_to_id[tokens[i]] == i, "token identifier must be it's position in tokens list"

print("Seems alright!")

In [None]:
def to_matrix(names,max_len=None,pad=0,dtype='int32'):
    """Casts a list of names into rnn-digestable matrix"""
    
    max_len = max_len or max(map(len,names))
    names_ix = np.zeros([len(names),max_len],dtype) + pad

    for i in range(len(names)):
        name_ix = list(map(token_to_id.get,names[i]))
        names_ix[i,:len(name_ix)] = name_ix

    return names_ix.T

In [None]:
import keras
from keras.layers import Concatenate,Dense,Embedding,Input
from keras.models import Model
from keras import regularizers

rnn_num_units = 20
embedding_size = 8

embed_x = Embedding(n_tokens,embedding_size) # an embedding layer that converts character ids into embeddings

#a dense layer that maps input and previous state to new hidden state, [x_t,h_t]->h_t+1
random_array = np.random.randn(rnn_num_units + embedding_size, rnn_num_units)
weights_0 = tf.Variable(initial_value= np.array(random_array,dtype=np.float32))
b_0 = tf.Variable(initial_value=np.zeros((rnn_num_units), dtype='float32'))


weights_1 = tf.Variable(initial_value=np.random.randn(rnn_num_units, n_tokens).astype('float32'))
b_1 = tf.Variable(initial_value=np.zeros((n_tokens), dtype='float32'))

# get_h_next = Dense(rnn_num_units, activation='tanh', kernel_regularizer=regularizers.l2(0.01))


#a dense layer that maps current hidden state to probabilities of characters [h_t+1]->P(x_t+1|h_t+1)
# get_probas = Dense(n_tokens, activation='softmax', kernel_regularizer=regularizers.l2(0.01))


In [None]:
from keras.layers.merge import concatenate

def rnn_one_step(x_t, h_t):

    #convert character id into embedding
    x_t_emb = embed_x(tf.reshape(x_t,[-1,1]))[:,0]
    
    #concatenate x embedding and previous h state
    x_and_h = tf.concat([x_t_emb, h_t], axis=-1)
    
    #compute next state given x_and_h
    h_next = tf.nn.tanh(tf.matmul(x_and_h, weights_0) + b_0)
    
    #get probabilities for language model P(x_next|h_next)
    output_probas = tf.nn.softmax(tf.matmul(h_next, weights_1) + b_1)
#     output_probas = get_probas(h_next)
    
    return output_probas,h_next

In [None]:
input_sequence = tf.placeholder('int32',(MAX_LENGTH, None))
batch_size = tf.shape(input_sequence)[1]

predicted_probas = []
h_prev = tf.zeros([batch_size,rnn_num_units]) #initial hidden state

for t in range(MAX_LENGTH):
    x_t = input_sequence[t]
    probas_next,h_next = rnn_one_step(x_t,h_prev)
    
    h_prev = h_next
    predicted_probas.append(probas_next)
    
predicted_probas = tf.stack(predicted_probas)

In [None]:
predictions_matrix = tf.reshape(predicted_probas[:-1],[-1,len(tokens)])
answers_matrix = tf.one_hot(tf.reshape(input_sequence[1:],[-1]), n_tokens)

In [None]:
reg_loss = 0.01*(tf.nn.l2_loss(weights_0) + tf.nn.l2_loss(weights_1))
loss = tf.reduce_mean(tf.losses.softmax_cross_entropy(onehot_labels=answers_matrix, logits=predictions_matrix)) + reg_loss

optimize = tf.train.AdamOptimizer().minimize(loss)

In [None]:
from IPython.display import clear_output
from random import sample
s = keras.backend.get_session()
s.run(tf.global_variables_initializer())
history = []

In [None]:
for i in range(8000):
    batch = to_matrix(sample(names,64),max_len=MAX_LENGTH)
    loss_i,_ = s.run([loss,optimize], {input_sequence:batch})
    
    
    history.append(loss_i)
    if (i+1)%100 == 0:
        clear_output(True)
        plt.plot(history,label='loss')
        plt.legend()
        plt.show()

assert np.mean(history[:10]) > np.mean(history[-10:]), "RNN didn't converge."