In [1]:
import time
from collections import namedtuple

import numpy as np
import tensorflow as tf

In [2]:
with open('anna.txt','r') as f:
    text=f.read()
vocab = set(text)
vocab_to_int = {c:i for i,c in enumerate(vocab)}
int_to_vocab = dict(enumerate(vocab))
encoded = np.array([vocab_to_int[c] for c in text],dtype=np.int32)

In [None]:
def get_batches(arr,n_seqs,n_steps):
    '''
    对已有的数组进行mini-batch分割
    
    arr：待分割的数组
    n_seqs:一个batch中序列个数
    n_steps：单个序列包含的字符数目
    '''
    batch_size = n_seqs*n_steps
    n_batches = int(len(arr)/batch_size)
    print('n_batches:',n_batches)
    #这里我们仅保留完整的batch，对于不能整除的部分进行舍弃
    arr = arr[:int(batch_size * n_batches)]
    
    #reconstruct
    arr = arr.reshape((n_seqs, -1))
    
    for n in range(0,arr.shape[1],n_steps):
        #inputs
        x = arr[:,n:n+n_steps]
        #targets
        y = np.zeros_like(x)
        y[:,:-1],y[:,-1] = x[:,1:],x[:,0]
        yield x,y
    #上面的代码定义了一个generator，调用函数会返回一个generator对象，我们可以获取一个batch

In [20]:
batches = get_batches(encoded,10,50)
x,y = next(batches)

3970


In [21]:
print('x\n',x[:10,:10])
print('\ny\n',y[:10,:10])

x
 [[45 61 47 27 19  6 28 77 20 72]
 [77 47 53 77 39 43 19 77 41 43]
 [81  1 39  8 72 72 69 23  6 50]
 [39 77 82 31 28  1 39 41 77 61]
 [77  1 19 77  1 50 62 77 50  1]
 [77 22 19 77 16 47 50 72 43 39]
 [61  6 39 77 10 43 53  6 77 15]
 [ 3 77  9 31 19 77 39 43 16 77]
 [19 77  1 50 39 56 19  8 77 36]
 [77 50 47  1 82 77 19 43 77 61]]

y
 [[61 47 27 19  6 28 77 20 72 72]
 [47 53 77 39 43 19 77 41 43  1]
 [ 1 39  8 72 72 69 23  6 50 62]
 [77 82 31 28  1 39 41 77 61  1]
 [ 1 19 77  1 50 62 77 50  1 28]
 [22 19 77 16 47 50 72 43 39  2]
 [ 6 39 77 10 43 53  6 77 15 43]
 [77  9 31 19 77 39 43 16 77 50]
 [77  1 50 39 56 19  8 77 36 61]
 [50 47  1 82 77 19 43 77 61  6]]


In [23]:
x.shape

(10, 50)

In [24]:
def build_inputs(num_seqs,num_steps):
    '''
    构建输入层
    num_seqs:每个batch中序列的个数
    num_steps:每个序列包含的字符数
    '''
    inputs = tf.placeholder(tf.int32,shape=(num_seqs,num_steps),name='inputs')
    targets = tf.placeholder(tf.int32,shape=(num_seqs,num_steps),name='targets')
    
    #add keep_prob
    keep_prob = tf.placeholder(tf.float32,name='keep_prob')
    
    return inputs, targets, keep_prob

In [48]:
# LSTM层
def build_lstm(lstm_size,num_layers,batch_size,keep_prob):
    '''
    构建lstm层
    keep_prob
    lstm_size:lstm隐层中结点数目
    num_layers:lstm的隐层数目
    batch_size:batch_size
    '''
    #构建一个基本lstm单元
    lstm = tf.nn.rnn_cell.BasicLSTMCell(lstm_size)
    
    #add dropout
    drop = tf.nn.rnn_cell.DropoutWrapper(lstm,output_keep_prob=keep_prob)
    
    #stack
    
    '''
    stacked_rnn = []
    for iiLyr in range(3):
        stacked_rnn.append(tf.nn.rnn_cell.LSTMCell(num_units=512, state_is_tuple=True))
    MultiLyr_cell = tf.nn.rnn_cell.MultiRNNCell(cells=stacked_rnn, state_is_tuple=True)
    '''
    
    #cell = tf.nn.rnn_cell.MultiRNNCell([drop for _ in range(num_layers)])#这里就是把drop层复制了num_layers层
    #initial_state = cell.zero_state(batch_size,tf.float32)
    
    stacked_rnn = []
    for iiLyr in range(num_layers):
        lstm = tf.nn.rnn_cell.BasicLSTMCell(lstm_size)
        drop = tf.nn.rnn_cell.DropoutWrapper(lstm,output_keep_prob=keep_prob)
        stacked_rnn.append(drop)
    cell = tf.nn.rnn_cell.MultiRNNCell(cells=stacked_rnn,state_is_tuple=True)
    initial_state = cell.zero_state(batch_size,tf.float32)
    

    return cell,initial_state

In [56]:
# output layer
def build_output(lstm_output,in_size,out_size):
    '''
    构造输出层
    
    lstm_output:lstm层输出的结果 (是一个三维数组)
    in_size: lstm输出层重塑后的size
    out_size：softmax层的size
    '''
    
    #将lstm的输出按照列concate，例如[[1,2,3],[7,8,9]],
    #tf.concat的结果是[1,2,3,7,8,9]
    seq_output = tf.concat(lstm_output, axis=1) #tf.concat(concat_dim,values)
    #reshape
    x = tf.reshape(seq_output,[-1,in_size])
    
    #将lstm层与softmax层全链接
    with tf.variable_scope('softmax'):
        softmax_w = tf.Variable(tf.truncated_normal([in_size,out_size],stddev=0.1))
        softmax_b = tf.Variable(tf.zeros(out_size))
    
    # calculate logits
    logits = tf.matmul(x,softmax_w) + softmax_b
    
    #softmax层返回概率分布
    out = tf.nn.softmax(logits,name='predictions')
    
    return out,logits

In [60]:
# 训练误差计算
def build_loss(logits,targets,lstm_size,num_classes):
    '''
    根据logits和targets计算损失
    
    logits:全链接层的输出结果
    targets:targets
    lstm_size
    num_classes: vocab_size
    '''
    
    #one_hot encode
    y_one_hot = tf.one_hot(targets,num_classes)
    y_reshaped = tf.reshape(y_one_hot,logits.get_shape())
    
    #softmax cross entropy loss
    loss = tf.nn.softmax_cross_entropy_with_logits(logits=logits,labels=y_reshaped)
    loss = tf.reduce_mean(loss)
    
    return loss

In [31]:
def build_optimizer(loss,learning_rate,grad_clip):
    '''
    构造Optimizer
    
    loss:损失
    learning_rate:学习率
    '''
    
    #use clipping gradients
    tvars = tf.trainable_variables()
    grads,_ = tf.clip_by_global_norm(tf.gradients(loss,tvars),grad_clip)
    train_op = tf.train.AdamOptimizer(learning_rate)
    optimizer = train_op.apply_gradients(zip(grads,tvars))
    
    return optimizer

In [58]:
class CharRNN:
    def __init__(self,num_classes,batch_size=64,num_steps=50,
                lstm_size=128,num_layers=2,learning_rate=0.001,
                grad_clip=5,sampling=False):
        
        if sampling == True:
            batch_size,num_steps = 1,1
        else:
            batch_size,num_steps = batch_size,num_steps
            
        tf.reset_default_graph()
        
        #input layer
        self.inputs,self.targets,self.keep_prob = build_inputs(batch_size,num_steps)
        
        #lstm layer
        cell,self.initial_state = build_lstm(lstm_size,num_layers,batch_size,self.keep_prob)
        
        # 对输入进行one_hot编码
        x_one_hot = tf.one_hot(self.inputs,num_classes)
        
        # run rnn
        #outputs, state = tf.nn.dynamic_rnn(cell, x_one_hot, initial_state=self.initial_state)
        outputs, state = tf.nn.dynamic_rnn(cell, x_one_hot, initial_state=self.initial_state)
        self.final_state = state
        
        # predict result
        self.prediction, self.logits = build_output(outputs,lstm_size,num_classes)
        self.loss = build_loss(self.logits, self.targets, lstm_size, num_classes)
        self.optimizer = build_optimizer(self.loss,learning_rate,grad_clip)

In [33]:
batch_size = 100
num_steps = 100
lstm_size = 512
num_layers = 2
learning_rate =0.001
keep_prob = 0.5

In [66]:
epoches = 2
# save the variables every n epoch
save_every_n = 20

model = CharRNN(len(vocab),batch_size=batch_size,num_steps=num_steps,
               lstm_size=lstm_size,num_layers=num_layers,
               learning_rate=learning_rate)

saver = tf.train.Saver(max_to_keep=100)
with tf.Session() as sess:
    sess.run(tf.global_variables_initializer())
    
    counter = 0
    for e in range(epoches):
        # train netword
        new_state = sess.run(model.initial_state)
        loss = 0
        for x,y in get_batches(encoded,batch_size,num_steps):
            print('epoch:',e,'\n','counter:',counter)
            counter += 1
            start = time.time()
            feed = {model.inputs:x,
                   model.targets:y,
                   model.keep_prob:keep_prob,
                   model.initial_state:new_state}
            batch_loss,new_state,_ = sess.run([model.loss,model.final_state,model.optimizer],feed_dict=feed)
            end = time.time()
            
            #control the print lines
            if counter % 100 == 0:
                print('轮数: {}/{}... '.format(e+1, epoches),
                      '训练步数: {}... '.format(counter),
                      '训练误差: {:.4f}... '.format(batch_loss),
                      '{:.4f} sec/batch'.format((end-start)))
            
            if counter % save_every_n == 0:
                saver.save(sess, "checkpoints/i{}_l{}.ckpt".format(counter, lstm_size))

198
epoch: 0 
 counter: 0
epoch: 0 
 counter: 1
epoch: 0 
 counter: 2
epoch: 0 
 counter: 3
epoch: 0 
 counter: 4
epoch: 0 
 counter: 5
epoch: 0 
 counter: 6
epoch: 0 
 counter: 7
epoch: 0 
 counter: 8
epoch: 0 
 counter: 9
epoch: 0 
 counter: 10
epoch: 0 
 counter: 11
epoch: 0 
 counter: 12
epoch: 0 
 counter: 13
epoch: 0 
 counter: 14
epoch: 0 
 counter: 15
epoch: 0 
 counter: 16
epoch: 0 
 counter: 17
epoch: 0 
 counter: 18
epoch: 0 
 counter: 19
epoch: 0 
 counter: 20
epoch: 0 
 counter: 21
epoch: 0 
 counter: 22
epoch: 0 
 counter: 23
epoch: 0 
 counter: 24
epoch: 0 
 counter: 25
epoch: 0 
 counter: 26
epoch: 0 
 counter: 27
epoch: 0 
 counter: 28
epoch: 0 
 counter: 29
epoch: 0 
 counter: 30
epoch: 0 
 counter: 31
epoch: 0 
 counter: 32
epoch: 0 
 counter: 33
epoch: 0 
 counter: 34
epoch: 0 
 counter: 35
epoch: 0 
 counter: 36
epoch: 0 
 counter: 37
epoch: 0 
 counter: 38
epoch: 0 
 counter: 39
epoch: 0 
 counter: 40
epoch: 0 
 counter: 41
epoch: 0 
 counter: 42
epoch: 0 
 counte

epoch: 1 
 counter: 339
epoch: 1 
 counter: 340
epoch: 1 
 counter: 341
epoch: 1 
 counter: 342
epoch: 1 
 counter: 343
epoch: 1 
 counter: 344
epoch: 1 
 counter: 345
epoch: 1 
 counter: 346
epoch: 1 
 counter: 347
epoch: 1 
 counter: 348
epoch: 1 
 counter: 349
epoch: 1 
 counter: 350
epoch: 1 
 counter: 351
epoch: 1 
 counter: 352
epoch: 1 
 counter: 353
epoch: 1 
 counter: 354
epoch: 1 
 counter: 355
epoch: 1 
 counter: 356
epoch: 1 
 counter: 357
epoch: 1 
 counter: 358
epoch: 1 
 counter: 359
epoch: 1 
 counter: 360
epoch: 1 
 counter: 361
epoch: 1 
 counter: 362
epoch: 1 
 counter: 363
epoch: 1 
 counter: 364
epoch: 1 
 counter: 365
epoch: 1 
 counter: 366
epoch: 1 
 counter: 367
epoch: 1 
 counter: 368
epoch: 1 
 counter: 369
epoch: 1 
 counter: 370
epoch: 1 
 counter: 371
epoch: 1 
 counter: 372
epoch: 1 
 counter: 373
epoch: 1 
 counter: 374
epoch: 1 
 counter: 375
epoch: 1 
 counter: 376
epoch: 1 
 counter: 377
epoch: 1 
 counter: 378
epoch: 1 
 counter: 379
epoch: 1 
 count

In [68]:
tf.train.get_checkpoint_state('checkpoints')

model_checkpoint_path: "checkpoints/i380_l512.ckpt"
all_model_checkpoint_paths: "checkpoints/i20_l512.ckpt"
all_model_checkpoint_paths: "checkpoints/i40_l512.ckpt"
all_model_checkpoint_paths: "checkpoints/i60_l512.ckpt"
all_model_checkpoint_paths: "checkpoints/i80_l512.ckpt"
all_model_checkpoint_paths: "checkpoints/i100_l512.ckpt"
all_model_checkpoint_paths: "checkpoints/i120_l512.ckpt"
all_model_checkpoint_paths: "checkpoints/i140_l512.ckpt"
all_model_checkpoint_paths: "checkpoints/i160_l512.ckpt"
all_model_checkpoint_paths: "checkpoints/i180_l512.ckpt"
all_model_checkpoint_paths: "checkpoints/i200_l512.ckpt"
all_model_checkpoint_paths: "checkpoints/i220_l512.ckpt"
all_model_checkpoint_paths: "checkpoints/i240_l512.ckpt"
all_model_checkpoint_paths: "checkpoints/i260_l512.ckpt"
all_model_checkpoint_paths: "checkpoints/i280_l512.ckpt"
all_model_checkpoint_paths: "checkpoints/i300_l512.ckpt"
all_model_checkpoint_paths: "checkpoints/i320_l512.ckpt"
all_model_checkpoint_paths: "checkpoints

In [69]:
def pick_top_n(preds,vocab_size,top_n=5):
    '''
    从预测结果中选取前top_n个最可能的字符
    
    preds:预测结果
    vocab_size
    top_n
    '''
    p = np.squeeze(preds)
    p[np.argsort(p)[:-top_n]] = 0
    
    p = p / np.sum(p)
    c = np.random.choice(vocab_size,1,p=p)[0]
    return c

In [79]:
def sample(checkpoint,n_samples,lstm_size,vocab_size,prime='The'):
    '''
    generate new text
    
    checkpoint: the parameters generated by some epoch
    n_sample: the lenght of the text to be generated
    lstm_size: the number of hidden units
    vocab_size
    prime: the start vocabulary of the text
    '''
    
    samples = [c for c in prime]
    # sampling=Ture 意味着batch的size=1x1
    model = CharRNN(len(vocab),lstm_size=lstm_size,sampling=True)
    saver = tf.train.Saver()
    with tf.Session() as sess:
        #加载模型，恢复训练
        saver.restore(sess,checkpoint)
        new_state = sess.run(model.initial_state)
        for c in prime:
            x = np.zeros((1,1))
            # input a single char
            x[0,0] = vocab_to_int[c]
            feed = {model.inputs:x,model.keep_prob:1.,
                   model.initial_state:new_state}
            preds,new_state = sess.run([model.prediction,model.final_state],
                                      feed_dict=feed)
            
        c = pick_top_n(preds,len(vocab))
        # add char to samples
        samples.append(int_to_vocab[c])
        
        for i in range(n_samples):
            x[0,0] = c
            feed = {model.inputs:x,model.keep_prob:1,model.initial_state:new_state}
            preds,new_state = sess.run([model.prediction,model.final_state],feed_dict=feed)
            
            c = pick_top_n(preds,len(vocab))
            samples.append(int_to_vocab[c])
            
        return ''.join(samples)    
            

In [71]:
tf.train.latest_checkpoint('checkpoints')

'checkpoints/i380_l512.ckpt'

In [72]:
checkpoint = tf.train.latest_checkpoint('checkpoints')

In [80]:
samp = sample(checkpoint,2000,lstm_size,len(vocab),prime='The')

INFO:tensorflow:Restoring parameters from checkpoints/i380_l512.ckpt


In [81]:
samp

'Ther\nelse, asd santting hid and wentent of\nthe cerpuned to the somered, to he saliding of the had hed and the casd torther," she had sell wish and at he cald on him sere that to the tho cas out ho warl whot a dover his war of thit some then wat her sald the\nsaid out the celpase taring hevored his\nwentere wish wore sat and tour the cous at tom hered and sourt in a derang on hes it he wan worle thate her all som, and wint whit the hind and heving to hem. "Wher, she\nwast it of\nsher the her as time the prise sad ithe she with to mast he was as the pesese fas hemsers, and witl thourtinn thet the wose whot the\nsind, ther the casse of it an the posed tho carter and his to had hor sithed the sontint\nwal wat it the she cunsed, hind the persterte ale he maditing on homes an his anded the soling has the wald."\n\n"Yet sime to sha chas or the piled to to he somer, the pasting, as the wout tit\non waser out of thoushed that\nwat and the sam that thet hom ta her here shand wesh whis ham har