In [1]:
print 'lstm'

lstm


In [2]:
import time
import numpy as np
import tensorflow as tf

In [4]:
with open('test_file/anna.txt','r') as f:
    text = f.read()
vocab = set(text)
vocab_to_int = {c: i for i, c in enumerate(vocab)}
int_to_vocab = dict(enumerate(vocab))

encoded = np.array([vocab_to_int[c] for c in text], dtype=np.int32)

In [80]:
# mini-batch
def get_batches(arr, n_seqs, n_steps):
    '''mini batched'''
    batch_size = n_seqs * n_steps
    n_batches = int(len(arr) / batch_size)
    arr = arr[: batch_size * n_batches]
    
    # reshape
    arr = arr.reshape((n_seqs, -1))
    for n in range(0, arr.shape[1], n_steps):
        x = arr[:, n:n+n_steps]
        y = np.zeros_like(x)
        y[:, :-1], y[:, -1] = x[:, 1:], y[:, 0]
        yield x, y

In [81]:
# input 
def build_inputs(num_seqs, num_steps):
    '''input'''
    inputs = tf.placeholder(tf.int32, shape=(num_seqs, num_steps), name='inputs')
    targets = tf.placeholder(tf.int32, shape=(num_seqs, num_steps), name='targets')
    
    keep_prob = tf.placeholder(tf.float32, name='keep_prob')
    return inputs, targets, keep_prob

In [82]:
# LSTM
def build_lstm(lstm_size, num_layers, batch_size, keep_prob):
    '''lstm'''
    lstm = tf.nn.rnn_cell.BasicLSTMCell(lstm_size)
    drop = tf.nn.rnn_cell.DropoutWrapper(lstm, output_keep_prob=keep_prob)
    
#     cell = tf.nn.rnn_cell.MultiRNNCell([drop for _ in range(num_layers)])
    lstm_cells = []
    for i in range(num_layers):
        lstm = tf.nn.rnn_cell.BasicLSTMCell(lstm_size)
        drop = tf.nn.rnn_cell.DropoutWrapper(lstm, output_keep_prob=keep_prob)
        lstm_cells.append(drop)
    cell = tf.nn.rnn_cell.MultiRNNCell(lstm_cells)
    initial_state = cell.zero_state(batch_size, tf.float32)
    
    return cell, initial_state

In [83]:
# output
def build_output(lstm_output, in_size, out_size):
    '''output'''
    seq_output = tf.concat(lstm_output, 1)
    x = tf.reshape(seq_output, [-1, in_size])
    
    with tf.variable_scope('softmax'):
        softmax_w = tf.Variable(tf.truncated_normal([in_size, out_size], stddev=0.1))
        softmax_b = tf.Variable(tf.zeros(out_size))
        
        # logits
    logits = tf.matmul(x, softmax_w) + softmax_b
    out = tf.nn.softmax(logits, name='predictions')
        
    return out, logits

In [84]:
#  loss
def build_loss(logits, targets, lstm_size, num_classes):
    '''logits, target'''
    y_one_hot = tf.one_hot(targets, num_classes)
    y_reshaped = tf.reshape(y_one_hot, logits.get_shape())
    
    # loss
    loss = tf.nn.softmax_cross_entropy_with_logits(logits=logits, labels=y_reshaped)
    loss = tf.reduce_mean(loss)
    
    return loss

In [85]:
# Optimizer, using gradient clipping
def build_optimizer(loss, learning_rate, grad_clip):
    '''Optimizer'''
    tvars = tf.trainable_variables()
    grads, _ = tf.clip_by_global_norm(tf.gradients(loss, tvars), grad_clip)
    train_op = tf.train.AdamOptimizer(learning_rate)
    optimizer = train_op.apply_gradients(zip(grads, tvars))
    
    return optimizer

In [86]:
class CharRNN:
    def __init__(self, num_classes, batch_size=64, num_steps=50, lstm_size=128, num_layers=2, learning_rate=0.001, grad_clip=5, sampling=False):
        if sampling == True:
            # SGD
            batch_size, num_steps = 1, 1
        else:
            batch_size, num_steps = batch_size, num_steps
        
        tf.reset_default_graph()
        
        # input
        self.inputs, self.targets, self.keep_prob = build_inputs(batch_size, num_steps)
        # LSTM
        cell, self.initial_state = build_lstm(lstm_size, num_layers, batch_size, self.keep_prob)
        # one-hot
        x_one_hot = tf.one_hot(self.inputs, num_classes)
        
        # run rnn
        outputs, state = tf.nn.dynamic_rnn(cell, x_one_hot, initial_state=self.initial_state)
        self.final_state = state
        
        # prediction
        self.prediction, self.logits = build_output(outputs, lstm_size, num_classes)
        
        # Loss
        self.loss = build_loss(self.logits, self.targets, lstm_size, num_classes)
        self.optimizer = build_optimizer(self.loss, learning_rate, grad_clip)

In [87]:
# training
batch_size = 100
num_steps = 100
lstm_size = 512
num_layers = 2
learning_rate = 0.001
keep_prob = 0.5

In [None]:
# run
epochs = 20
save_every_n = 200

model = CharRNN(len(vocab), batch_size=batch_size, num_steps=num_steps, lstm_size=lstm_size, num_layers=num_layers, learning_rate=learning_rate)

saver = tf.train.Saver(max_to_keep=100)

with tf.Session() as sess:
    sess.run(tf.global_variables_initializer())
    
    counter = 0
    for e in range(epochs):
        # Train network
        new_state = sess.run(model.initial_state)
        loss = 0
        for x, y in get_batches(encoded, batch_size, num_steps):
            counter += 1
            start = time.time()
            feed = {
                model.inputs: x,
                model.targets: y,
                model.keep_prob: keep_prob,
                model.initial_state: new_state
            }
            batch_loss, new_state, _ = sess.run([model.loss, model.final_state, model.optimizer], feed_dict=feed)
            end = time.time()
            # 
            if counter % 500 == 0:
                print 'count: {}/{}, steps: {}, loss: {:.4f}, {:.4f} sec/batch'.format(e+1, epochs, counter, batch_loss, (end-start))
                
            if counter % save_every_n == 0:
                saver.save(sess, 'checkpoints/i{}_1{}.ckpt'.format(counter, lstm_size))
                
    saver.save(sess, 'checkpoints/i{}_1{}.ckpt'.format(counter, lstm_size))

In [73]:
len(vocab)

111