In [2]:
import numpy as np
import tensorflow as tf
import urllib

In [5]:
def reset_tf():
    global sess
    sess.close()
    tf.reset_default_graph()
    sess = tf.InteractiveSession(config=tf.ConfigProto(log_device_placement=True))

In [6]:
sess = tf.InteractiveSession(config=tf.ConfigProto(log_device_placement=True))

In [8]:
reset_tf()

seq_length = 50
batch_size = 64
embedding_size = 64
hidden_size = 64
vocab_size = 256
num_layers = 2

input_data = tf.placeholder(tf.int32, [batch_size, seq_length])
input_lengths = tf.placeholder(tf.int32, [batch_size])
target_data = tf.placeholder(tf.int32, [batch_size, seq_length])

rnn_cell = tf.nn.rnn_cell.MultiRNNCell([tf.nn.rnn_cell.GRUCell(hidden_size) for i in range(num_layers)])

initial_states = rnn_cell.zero_state(batch_size, tf.float32)

# embedded_inputs = tf.one_hot(input_data, vocab_size)

embedding = tf.get_variable('embedding', [vocab_size, embedding_size])
embedded_inputs = tf.nn.embedding_lookup(embedding, input_data)

softmax_w = tf.get_variable("softmax_w", [hidden_size, vocab_size])
softmax_b = tf.get_variable("softmax_b", [vocab_size])

outputs, final_states = tf.nn.dynamic_rnn(rnn_cell,
                                          embedded_inputs, 
                                          initial_state=initial_states, 
                                          sequence_length=input_lengths)

flat_outputs = tf.reshape(outputs, [-1, hidden_size])
flat_targets = tf.reshape(target_data, [-1])

flat_output_logits = tf.matmul(flat_outputs, softmax_w) + softmax_b
flat_output_probs = tf.nn.softmax(flat_output_logits)

flat_loss_mask = tf.sign(tf.to_float(flat_targets))
flat_losses = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=flat_output_logits, labels=flat_targets) * flat_loss_mask
# flat_losses = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=flat_output_logits, labels=flat_targets)

mean_loss = tf.reduce_mean(flat_losses)
total_loss = tf.reduce_sum(flat_losses)

optimizer = tf.train.AdamOptimizer(1e-3)
gradients, variables = zip(*optimizer.compute_gradients(mean_loss))
gradients, _ = tf.clip_by_global_norm(gradients, 5.0)
train_op = optimizer.apply_gradients(zip(gradients, variables))

# train_op = tf.train.AdamOptimizer(0.01).minimize(mean_loss)

sess.run(tf.global_variables_initializer())

In [5]:
def sample(initial_data, count):
    curr_initial_states  = sess.run(initial_states)
    curr_input_data = np.zeros(input_data.shape)
    curr_input_lengths = [1] + [0] * (batch_size - 1)
    
    result = [initial_data]
    
    for i in range(count):
        curr_input_data[0,0] = result[-1]
        ps, curr_initial_states = sess.run((flat_output_probs, final_states), feed_dict = {
            input_data: curr_input_data,
            input_lengths: curr_input_lengths,
            initial_states: curr_initial_states
        })
        result.append(np.random.choice(len(ps[0]), p = ps[0]))
    
    return result

In [6]:
def generate_batches(array, batch_size, seq_length):
    num_seqs = (len(array) + seq_length - 1) // seq_length
    num_seqs_per_batch = (num_seqs + batch_size - 1) // batch_size
    
    for i in range(num_seqs_per_batch):
        seqs = []
        seq_lens = []
        
        for j in range(batch_size):
            offset = (j*num_seqs_per_batch + i)*seq_length
            
            seq = array[offset:offset+seq_length]
            seq_len = len(seq)
            seq = np.pad(seq, (0,seq_length-len(seq)), 'constant', constant_values=0)
            
            seqs.append(seq)
            seq_lens.append(seq_len)
            
        yield np.stack(seqs), seq_lens
        
    return

In [7]:
train_text = None
with urllib.request.urlopen('http://textfiles.com/stories/13chil.txt') as response:
    train_text = response.read().decode("utf-8")
train_text = ' '.join(train_text.split())

In [10]:
train_array = np.array([ord(ch) for ch in train_text])

# train_array = np.array(list(range(30)))

for i in range(10000):
    epoch_loss = 0.0
    curr_initial_states = sess.run(initial_states)
    train_input_batches = generate_batches(train_array[:-1], batch_size, seq_length)
    train_target_batches = generate_batches(train_array[1:], batch_size, seq_length)
    
    for (curr_input_data, curr_input_lens), (curr_target_data, _) in zip(train_input_batches, train_target_batches):
        feed_dict = {
            input_data: curr_input_data, 
            input_lengths: curr_input_lens,
            target_data: curr_target_data,
            initial_states: curr_initial_states }
        _, curr_loss, curr_initial_states = sess.run((train_op, total_loss, final_states), feed_dict = feed_dict)
        epoch_loss += curr_loss
        
    epoch_loss /= len(train_array) - 1
    
    if i % 50 == 0:
        print(f'epoch {i}: loss={epoch_loss}')


epoch 0: loss=0.37350821203115037
epoch 50: loss=0.029955742310504525
epoch 100: loss=0.02422499413392982
epoch 150: loss=0.021637911699256118
epoch 200: loss=0.019935383115495954
epoch 250: loss=0.018712652459436534
epoch 300: loss=0.01770838961309316
epoch 350: loss=0.016854421459898655
epoch 400: loss=0.01610479111574134
epoch 450: loss=0.015439863350926614
epoch 500: loss=0.014835039693482068
epoch 550: loss=0.014362079634958384
epoch 600: loss=0.01379976284747221
epoch 650: loss=0.013394939169591787
epoch 700: loss=0.02729761065269003
epoch 750: loss=0.018836065944360228
epoch 800: loss=0.01647875381975758
epoch 850: loss=0.015144021900332703
epoch 900: loss=0.014278357126274887
epoch 950: loss=0.0136388610820381
epoch 1000: loss=0.01314244878535368
epoch 1050: loss=0.012734600840782633
epoch 1100: loss=0.012659464928568625
epoch 1150: loss=0.012109175263618936
epoch 1200: loss=0.011830008394864141
epoch 1250: loss=0.011583019762623067
epoch 1300: loss=0.011363625648070355
epoch 1

In [16]:
''.join([chr(ch) for ch in sample(ord('M'), 500)])

'M yound and string became suspicious and cotther toked on end with fright. Oh, how foolish she had been! Her greed had trapped her. If only she had stayed home to a great bedall of the hont at all fright us off it chas suthing. "Well, firs quilllhing to the worhisg and pad the quird, Mrs. Hedgehog became suspicious and cried, "How long before we reach that field of grasshoppers?" "Why, you silly a hake among as the musinly he stopped digging, and threw back his ears to listen. Then he quickly jum'

In [202]:
outputs.shape

TensorShape([Dimension(64), Dimension(50), Dimension(32)])

In [3]:
x = np.array([True, True, False])
y = np.array([1., 2., 3.])

In [4]:
x*y

array([ 1.,  2.,  0.])