### 载入所需库和读取数据

In [1]:
import time
from collections import namedtuple
import numpy as np
import tensorflow as tf

In [None]:
set('abc') #{'a', 'b', 'c'}

In [2]:
import os
os.chdir('C:/test/tensorflow/Dataset')
with open('anna.txt') as file:
    text = file.read()
vocab = sorted(set(text))
vocab_to_int = {c: i for i, c in enumerate(vocab)}
int_to_vocab = dict(enumerate(vocab))
encoded = np.array([vocab_to_int[c] for c in text], dtype = np.float32)

### Making training mini-batches

In [31]:
def get_batches(arr, batch_size, n_steps):
    # 对数据进行截取，使得数据满足batch_size * n_steps
    chars_per_batch = batch_size * n_steps
    n_bathes = len(arr)//chars_per_batch 
    arr = arr[:n_bathes * chars_per_batch]
    # 对数据进行reshape,得到N*(M*K), N:batch size, M:step,K:number of batches 
    arr = arr.reshape((batch_size, -1))
    #产生数据的生成器
    for n in range(0, arr.shape[1], n_steps):
        x = arr[:, n:n+ n_steps]
        # 应对last batch label不足的问题
        y_temp = arr[:, n+1: n+n_steps + 1]   
        y = np.zeros(x.shape, dtype = x.dtype)
        y[:, :y_temp.shape[1]] = y_temp
        yield x, y

### Building the model
#### inputs

In [32]:
def build_inputs(batch_size, num_steps):
    inputs = tf.placeholder(tf.int32, [batch_size, num_steps], name = 'inputs')
    targets = tf.placeholder(tf.int32, [batch_size, num_steps], name = 'targets')
    keep_prob = tf.placeholder(tf.float32, name = 'keep')
    return inputs, targets, keep_prob

#### LSTM Cell
- create a basic LSTM cell with `lstm = tf.contrib.rnn.BasicLSTMCell(num_units)`
- add dropout by wrapping it with
`tf.contrib.rnn.DropoutWrapper(lstm, output_keep_prob=keep_prob)`
- stack up the LSTM cells into layers with `tf.contrib.rnn.MultiRNNCell`.
- create an initial cell state of all zeros. 
`initial_state = cell.zero_state(batch_size, tf.float32)`

In [33]:
def build_lstm(lstm_size, num_layers, batch_size, keep_prob):
    def build_cell(lstm_size, keep_prob):
        lstm = tf.contrib.rnn.BasicLSTMCell(lstm_size)
        drop = tf.contrib.rnn.DropoutWrapper(lstm, output_keep_prob=keep_prob)
        return drop
    cell = tf.contrib.rnn.MultiRNNCell([build_cell(lstm_size, keep_prob) for _ in range(num_layers)])
    initial_state = cell.zero_state(batch_size, tf.float32)
    return cell, initial_state

#### RNN output
- output shape: N * M * L L: hidden units 为了方便后来的全连接层reshape成(N * M)* L

In [34]:
def build_output(lstm_output, in_size, out_size):
    seq_output = tf.concat(lstm_output, axis = 1) # M *(L * N)
    x = tf.reshape(seq_output, [-1, in_size]) # (M * N ) * L
    with tf.variable_scope('softmax'):
        softmax_w = tf.Variable(tf.truncated_normal((in_size, out_size), stddev = 0.1))
        sofmax_b = tf.Variable(tf.zeros(out_size))
        logits = tf.matmul(x, softmax_w) + sofmax_b
        out = tf.nn.softmax(logits, name = 'predictions')
        return out, logits

#### Training Loss

In [35]:
def build_loss(logits, targets, lstm_size, num_class):
    y_one_hot = tf.one_hot(targets, num_class)
    y_reshaped = tf.reshape(y_one_hot, logits.get_shape())
    loss = tf.nn.softmax_cross_entropy_with_logits(
    logits = logits, labels = y_reshaped)
    loss = tf.reduce_mean(loss)
    return loss

#### Optimizer
- Normal RNNs have have issues gradients exploding and disappearing, clip the gradients above some threshold.

In [36]:
def build_optimizer(loss, learning_rate, grad_clip):
    tvars = tf.trainable_variables()
    grads, _ = tf.clip_by_global_norm(tf.gradients(
    loss, tvars), grad_clip)
    train_op = tf.train.AdamOptimizer(learning_rate)
    optimizer = train_op.apply_gradients(zip(grads, tvars))
    return optimizer

#### Build the network

In [37]:
class CharRNN:
    def __init__(self, num_classes, batch_size = 64, 
                num_steps = 50, lstm_size = 128, num_layers = 2, 
                learning_rate = 0.001, grad_clip = 5, sampling = False):
        if sampling == True:
            batch_size, num_steps = 1, 1
        else:
            batch_size, num_steps = batch_size, num_steps
        tf.reset_default_graph()
        self.inputs, self.targets, self.keep_prob = build_inputs(
        batch_size, num_steps)
        cell, self.initial_state = build_lstm(lstm_size, 
                                             num_layers, batch_size, self.keep_prob)
        x_one_hot = tf.one_hot(self.inputs, num_classes)
        outputs, state = tf.nn.dynamic_rnn(cell, x_one_hot, initial_state = self.initial_state)
        self.final_state = state
        self.prediction, self.logits = build_output(
        outputs, lstm_size, num_classes)
        self.loss = build_loss(self.logits, self.targets, lstm_size, 
                              num_classes)
        self.optimizer = build_optimizer(self.loss, 
                                        learning_rate, grad_clip)
        

### Time for training

In [38]:
batch_size = 100        # Sequences per batch
num_steps = 100         # Number of sequence steps per batch
lstm_size = 512         # Size of hidden layers in LSTMs
num_layers = 2          # Number of LSTM layers
learning_rate = 0.001   # Learning rate
keep_prob = 0.5

In [21]:
epochs = 20
print_every_n = 50
save_every_n = 200
model = CharRNN(len(vocab), batch_size = batch_size, 
               num_steps = num_steps, lstm_size = lstm_size, 
               num_layers = num_layers, learning_rate = learning_rate)
saver = tf.train.Saver(max_to_keep=100)
with tf.Session() as sess:
    sess.run(tf.global_variables_initializer())
    counter = 0
    for e in range(epochs):
        new_state = sess.run(model.initial_state)
        loss = 0
        for x, y in get_batches(encoded, batch_size, num_steps):
            counter += 1
            feed = {model.inputs: x, model.targets: y, 
                   model.keep_prob: keep_prob, model.initial_state: new_state}
            batch_loss, new_state, _ = sess.run([model.loss, model.final_state,
                                                 model.optimizer], feed_dict = feed)
            if counter%save_every_n ==0:
                saver.save(sess, 'checkpoints/i{}_{}.ckpt'.format(
                counter, lstm_size))
saver.save(sess, 'checkpoints/i{}_l{}'.format(counter, lstm_size))

(2, 2, 2)

In [22]:
c.shape

(2, 4)