In [1]:
import time
from collections import namedtuple
import numpy as np
import tensorflow as tf

  from ._conv import register_converters as _register_converters


In [2]:
with open('anna.txt','r') as f:
    text = f.read()
vocab = set(text)
vocab_to_int = {c:i for i, c in enumerate(vocab)}
int_to_vocab = dict(enumerate(vocab))
encoded = np.array([vocab_to_int[c] for c in text], dtype=np.int32)

In [3]:
text[:100]

'Chapter 1\n\n\nHappy families are all alike; every unhappy family is unhappy in its own\nway.\n\nEverythin'

In [4]:
encoded[:100]

array([ 5,  7,  0, 61,  1, 10, 62, 63, 23, 74, 74, 74, 64,  0, 61, 61, 35,
       63, 42,  0, 69, 28, 18, 28, 10, 25, 63,  0, 62, 10, 63,  0, 18, 18,
       63,  0, 18, 28, 29, 10, 39, 63, 10, 19, 10, 62, 35, 63, 72,  2,  7,
        0, 61, 61, 35, 63, 42,  0, 69, 28, 18, 35, 63, 28, 25, 63, 72,  2,
        7,  0, 61, 61, 35, 63, 28,  2, 63, 28,  1, 25, 63, 50, 49,  2, 74,
       49,  0, 35, 59, 74, 74,  3, 19, 10, 62, 35,  1,  7, 28,  2])

In [5]:
len(vocab)

83

In [11]:
def get_batches(arr, n_seqs, n_steps):
    characters_per_batch = n_seqs * n_steps
    n_batches = len(arr)//characters_per_batch
    
    arr = arr[:n_batches*characters_per_batch]
    arr = arr.reshape((n_seqs,-1))
    
    for n in range(0, arr.shape[1], n_steps):
        x = arr[:, n:n+n_steps]
        y = np.zeros_like(x)
        
        y[:, :-1], y[:,-1] = x[:,1:],x[:,0]
        yield x,y

In [12]:
batches = get_batches(encoded, 10, 50)
x, y = next(batches)

In [13]:
print('x\n', x[:10, :10])
print('\ny\n', y[:10, :10])

x
 [[ 5  7  0 61  1 10 62 63 23 74]
 [63  0 69 63  2 50  1 63 81 50]
 [19 28  2 59 74 74 38 17 10 25]
 [ 2 63 76 72 62 28  2 81 63  7]
 [63 28  1 63 28 25 57 63 25 28]
 [63 32  1 63 49  0 25 74 50  2]
 [ 7 10  2 63 24 50 69 10 63 42]
 [39 63 43 72  1 63  2 50 49 63]
 [ 1 63 28 25  2  8  1 59 63 44]
 [63 25  0 28 76 63  1 50 63  7]]

y
 [[ 7  0 61  1 10 62 63 23 74 74]
 [ 0 69 63  2 50  1 63 81 50 28]
 [28  2 59 74 74 38 17 10 25 57]
 [63 76 72 62 28  2 81 63  7 28]
 [28  1 63 28 25 57 63 25 28 62]
 [32  1 63 49  0 25 74 50  2 18]
 [10  2 63 24 50 69 10 63 42 50]
 [63 43 72  1 63  2 50 49 63 25]
 [63 28 25  2  8  1 59 63 44  7]
 [25  0 28 76 63  1 50 63  7 10]]


In [14]:
def build_inputs(batch_size, num_steps):
    
    inputs = tf.placeholder(tf.int32, [batch_size, num_steps], name='inputs')
    targets = tf.placeholder(tf.int32, [batch_size, num_steps], name='targets')
    keep_prob = tf.placeholder(tf.float32, name='keep_prob')
    
    return inputs, targets, keep_prob

In [15]:
def build_lstm(lstm_size, num_layers, batch_size, keep_prob):
    
    def build_cell(lstm_size, keep_prob):
        lstm = tf.contrib.rnn.BasicLSTMCell(lstm_size)
        drop = tf.contrib.rnn.DropoutWrapper(lstm, output_keep_prob = keep_prob)
        return drop

    cell = tf.contrib.rnn.MultiRNNCell([build_cell(lstm_size, keep_prob) for _ in range(num_layers)])
    initial_state = cell.zero_state(batch_size, tf.float32)
    
    return cell, initial_state

In [16]:
def build_output(lstm_output, in_size, out_size):
    
    seq_output = tf.concat(lstm_output, axis=1)
    x = tf.reshape(seq_output, [-1, in_size])
    
    with tf.variable_scope('softmax'):
        softmax_w = tf.Variable(tf.truncated_normal((in_size, out_size), stddev=0.1))
        softmax_b = tf.Variable(tf.zeros(out_size))
    
    logits = tf.matmul(x,softmax_w) + softmax_b
    out = tf.nn.softmax(logits, name='predictions')
    
    return out, logits

In [17]:
def build_loss(logits, targets, lstm_size, num_classes):
    
    y_one_hot = tf.one_hot(targets, num_classes)
    y_reshaped = tf.reshape(y_one_hot, logits.get_shape())
    
    loss = tf.nn.softmax_cross_entropy_with_logits(logits=logits, labels=y_reshaped)
    loss = tf.reduce_mean(loss)
    return loss

In [18]:
def build_optimizer(loss, learning_rate, grad_clip):
    
    tvars = tf.trainable_variables()
    grads, _ = tf.clip_by_global_norm(tf.gradients(loss, tvars), grad_clip)
    train_op = tf.train.AdamOptimizer(learning_rate)
    optimizer = train_op.apply_gradients(zip(grads, tvars))
    
    return optimizer

In [22]:
class CharRNN:
    
    def __init__(self, num_classes, batch_size=64, num_steps=50,
                lstm_size=128, num_layers=2, learning_rate=0.001,
                grad_clip=5, sampling=False):
        
        if sampling==True:
            batch_size, num_steps = 1,1
        else:
            batch_size, num_steps = batch_size, num_steps
        
        tf.reset_default_graph()
        
        self.inputs, self.targets, self.keep_prob = build_inputs(batch_size, num_steps)
        cell, self.initial_state = build_lstm(lstm_size, num_layers, batch_size, self.keep_prob)
        
        x_one_hot = tf.one_hot(self.inputs, num_classes)
        
        outputs, state = tf.nn.dynamic_rnn(cell, x_one_hot, initial_state = self.initial_state)
        self.final_state = state
        
        self.prediction, self.logits = build_output(outputs, lstm_size, num_classes)
        
        self.loss = build_loss(self.logits, self.targets, lstm_size, num_classes)
        self.optimizer = build_optimizer(self.loss, learning_rate, grad_clip)

In [23]:
batch_size = 100        
num_steps = 100         
lstm_size = 512         
num_layers = 2          
learning_rate = 0.001   
keep_prob = 0.5         

In [24]:
epochs = 20
# Save every N iterations
save_every_n = 200

model = CharRNN(len(vocab), batch_size=batch_size, num_steps=num_steps,
                lstm_size=lstm_size, num_layers=num_layers, 
                learning_rate=learning_rate)

saver = tf.train.Saver(max_to_keep=100)
with tf.Session() as sess:
    sess.run(tf.global_variables_initializer())
    
    # Use the line below to load a checkpoint and resume training
    #saver.restore(sess, 'checkpoints/______.ckpt')
    counter = 0
    for e in range(epochs):
        # Train network
        new_state = sess.run(model.initial_state)
        loss = 0
        for x, y in get_batches(encoded, batch_size, num_steps):
            counter += 1
            start = time.time()
            feed = {model.inputs: x,
                    model.targets: y,
                    model.keep_prob: keep_prob,
                    model.initial_state: new_state}
            batch_loss, new_state, _ = sess.run([model.loss, 
                                                 model.final_state, 
                                                 model.optimizer], 
                                                 feed_dict=feed)
            
            end = time.time()
            print('Epoch: {}/{}... '.format(e+1, epochs),
                  'Training Step: {}... '.format(counter),
                  'Training loss: {:.4f}... '.format(batch_loss),
                  '{:.4f} sec/batch'.format((end-start)))
        
            if (counter % save_every_n == 0):
                saver.save(sess, "checkpoints/i{}_l{}.ckpt".format(counter, lstm_size))
    
    saver.save(sess, "checkpoints/i{}_l{}.ckpt".format(counter, lstm_size))

Epoch: 1/20...  Training Step: 1...  Training loss: 4.4177...  10.7653 sec/batch
Epoch: 1/20...  Training Step: 2...  Training loss: 4.3276...  7.1861 sec/batch
Epoch: 1/20...  Training Step: 3...  Training loss: 3.8290...  7.2061 sec/batch
Epoch: 1/20...  Training Step: 4...  Training loss: 5.2100...  7.1581 sec/batch
Epoch: 1/20...  Training Step: 5...  Training loss: 4.3128...  7.2051 sec/batch
Epoch: 1/20...  Training Step: 6...  Training loss: 3.8016...  8.0547 sec/batch
Epoch: 1/20...  Training Step: 7...  Training loss: 3.7036...  8.2479 sec/batch
Epoch: 1/20...  Training Step: 8...  Training loss: 3.5802...  8.1058 sec/batch
Epoch: 1/20...  Training Step: 9...  Training loss: 3.4868...  8.0893 sec/batch
Epoch: 1/20...  Training Step: 10...  Training loss: 3.4194...  8.0547 sec/batch
Epoch: 1/20...  Training Step: 11...  Training loss: 3.3607...  8.0727 sec/batch
Epoch: 1/20...  Training Step: 12...  Training loss: 3.3515...  8.0967 sec/batch
Epoch: 1/20...  Training Step: 13...

Epoch: 1/20...  Training Step: 103...  Training loss: 3.1055...  9.7479 sec/batch
Epoch: 1/20...  Training Step: 104...  Training loss: 3.1048...  9.6969 sec/batch
Epoch: 1/20...  Training Step: 105...  Training loss: 3.0966...  9.6259 sec/batch
Epoch: 1/20...  Training Step: 106...  Training loss: 3.0939...  9.6699 sec/batch
Epoch: 1/20...  Training Step: 107...  Training loss: 3.0741...  9.5648 sec/batch
Epoch: 1/20...  Training Step: 108...  Training loss: 3.0810...  9.6163 sec/batch
Epoch: 1/20...  Training Step: 109...  Training loss: 3.0856...  9.6969 sec/batch
Epoch: 1/20...  Training Step: 110...  Training loss: 3.0470...  10.2343 sec/batch
Epoch: 1/20...  Training Step: 111...  Training loss: 3.0680...  8.3850 sec/batch
Epoch: 1/20...  Training Step: 112...  Training loss: 3.0779...  8.4160 sec/batch
Epoch: 1/20...  Training Step: 113...  Training loss: 3.0609...  8.7042 sec/batch
Epoch: 1/20...  Training Step: 114...  Training loss: 3.0482...  8.2569 sec/batch
Epoch: 1/20... 

Epoch: 2/20...  Training Step: 203...  Training loss: 2.4911...  8.5561 sec/batch
Epoch: 2/20...  Training Step: 204...  Training loss: 2.4902...  8.0347 sec/batch
Epoch: 2/20...  Training Step: 205...  Training loss: 2.4926...  8.3099 sec/batch
Epoch: 2/20...  Training Step: 206...  Training loss: 2.4986...  8.2569 sec/batch
Epoch: 2/20...  Training Step: 207...  Training loss: 2.5038...  8.3870 sec/batch
Epoch: 2/20...  Training Step: 208...  Training loss: 2.4801...  10.4524 sec/batch
Epoch: 2/20...  Training Step: 209...  Training loss: 2.4616...  9.6444 sec/batch
Epoch: 2/20...  Training Step: 210...  Training loss: 2.4798...  10.5725 sec/batch
Epoch: 2/20...  Training Step: 211...  Training loss: 2.4721...  11.0464 sec/batch
Epoch: 2/20...  Training Step: 212...  Training loss: 2.4972...  10.7026 sec/batch
Epoch: 2/20...  Training Step: 213...  Training loss: 2.4749...  13.1053 sec/batch
Epoch: 2/20...  Training Step: 214...  Training loss: 2.4675...  10.0061 sec/batch
Epoch: 2/2

Epoch: 2/20...  Training Step: 303...  Training loss: 2.2452...  8.4060 sec/batch
Epoch: 2/20...  Training Step: 304...  Training loss: 2.2521...  8.2789 sec/batch
Epoch: 2/20...  Training Step: 305...  Training loss: 2.2499...  8.2023 sec/batch
Epoch: 2/20...  Training Step: 306...  Training loss: 2.2770...  8.2709 sec/batch
Epoch: 2/20...  Training Step: 307...  Training loss: 2.2755...  8.2028 sec/batch
Epoch: 2/20...  Training Step: 308...  Training loss: 2.2443...  8.2779 sec/batch
Epoch: 2/20...  Training Step: 309...  Training loss: 2.2532...  8.4600 sec/batch
Epoch: 2/20...  Training Step: 310...  Training loss: 2.2598...  8.3689 sec/batch
Epoch: 2/20...  Training Step: 311...  Training loss: 2.2390...  8.3099 sec/batch
Epoch: 2/20...  Training Step: 312...  Training loss: 2.2310...  8.2654 sec/batch
Epoch: 2/20...  Training Step: 313...  Training loss: 2.2343...  8.2208 sec/batch
Epoch: 2/20...  Training Step: 314...  Training loss: 2.2012...  8.5180 sec/batch
Epoch: 2/20...  

Epoch: 3/20...  Training Step: 403...  Training loss: 2.1061...  12.5429 sec/batch
Epoch: 3/20...  Training Step: 404...  Training loss: 2.1027...  9.4667 sec/batch
Epoch: 3/20...  Training Step: 405...  Training loss: 2.1291...  12.0446 sec/batch
Epoch: 3/20...  Training Step: 406...  Training loss: 2.1014...  10.0491 sec/batch
Epoch: 3/20...  Training Step: 407...  Training loss: 2.0851...  11.9735 sec/batch
Epoch: 3/20...  Training Step: 408...  Training loss: 2.0713...  9.9841 sec/batch
Epoch: 3/20...  Training Step: 409...  Training loss: 2.0994...  9.4737 sec/batch
Epoch: 3/20...  Training Step: 410...  Training loss: 2.1285...  12.3983 sec/batch
Epoch: 3/20...  Training Step: 411...  Training loss: 2.0788...  10.4074 sec/batch
Epoch: 3/20...  Training Step: 412...  Training loss: 2.0713...  9.5538 sec/batch
Epoch: 3/20...  Training Step: 413...  Training loss: 2.0845...  9.9756 sec/batch
Epoch: 3/20...  Training Step: 414...  Training loss: 2.1260...  10.1052 sec/batch
Epoch: 3/

Epoch: 3/20...  Training Step: 503...  Training loss: 1.9764...  8.4380 sec/batch
Epoch: 3/20...  Training Step: 504...  Training loss: 1.9915...  8.2529 sec/batch
Epoch: 3/20...  Training Step: 505...  Training loss: 1.9908...  8.2424 sec/batch
Epoch: 3/20...  Training Step: 506...  Training loss: 1.9866...  8.2979 sec/batch
Epoch: 3/20...  Training Step: 507...  Training loss: 1.9688...  8.1708 sec/batch
Epoch: 3/20...  Training Step: 508...  Training loss: 1.9632...  8.2008 sec/batch
Epoch: 3/20...  Training Step: 509...  Training loss: 1.9709...  8.3139 sec/batch
Epoch: 3/20...  Training Step: 510...  Training loss: 1.9668...  8.3459 sec/batch
Epoch: 3/20...  Training Step: 511...  Training loss: 1.9481...  8.1968 sec/batch
Epoch: 3/20...  Training Step: 512...  Training loss: 1.9315...  8.2239 sec/batch
Epoch: 3/20...  Training Step: 513...  Training loss: 1.9729...  8.2078 sec/batch
Epoch: 3/20...  Training Step: 514...  Training loss: 1.9596...  8.2318 sec/batch
Epoch: 3/20...  

Epoch: 4/20...  Training Step: 603...  Training loss: 1.9232...  9.8180 sec/batch
Epoch: 4/20...  Training Step: 604...  Training loss: 1.8932...  12.3903 sec/batch
Epoch: 4/20...  Training Step: 605...  Training loss: 1.8743...  9.9120 sec/batch
Epoch: 4/20...  Training Step: 606...  Training loss: 1.8727...  11.7634 sec/batch
Epoch: 4/20...  Training Step: 607...  Training loss: 1.8880...  10.0762 sec/batch
Epoch: 4/20...  Training Step: 608...  Training loss: 1.9206...  11.7273 sec/batch
Epoch: 4/20...  Training Step: 609...  Training loss: 1.8853...  9.6639 sec/batch
Epoch: 4/20...  Training Step: 610...  Training loss: 1.8661...  12.2517 sec/batch
Epoch: 4/20...  Training Step: 611...  Training loss: 1.8872...  11.5022 sec/batch
Epoch: 4/20...  Training Step: 612...  Training loss: 1.9172...  10.1112 sec/batch
Epoch: 4/20...  Training Step: 613...  Training loss: 1.8817...  12.4398 sec/batch
Epoch: 4/20...  Training Step: 614...  Training loss: 1.8925...  9.7940 sec/batch
Epoch: 4

Epoch: 4/20...  Training Step: 703...  Training loss: 1.8249...  9.6914 sec/batch
Epoch: 4/20...  Training Step: 704...  Training loss: 1.8206...  9.6458 sec/batch
Epoch: 4/20...  Training Step: 705...  Training loss: 1.8024...  9.4837 sec/batch
Epoch: 4/20...  Training Step: 706...  Training loss: 1.7943...  9.3887 sec/batch
Epoch: 4/20...  Training Step: 707...  Training loss: 1.8073...  9.5808 sec/batch
Epoch: 4/20...  Training Step: 708...  Training loss: 1.7994...  9.8270 sec/batch
Epoch: 4/20...  Training Step: 709...  Training loss: 1.7887...  9.6819 sec/batch
Epoch: 4/20...  Training Step: 710...  Training loss: 1.7755...  10.0617 sec/batch
Epoch: 4/20...  Training Step: 711...  Training loss: 1.8178...  9.0344 sec/batch
Epoch: 4/20...  Training Step: 712...  Training loss: 1.7921...  9.4027 sec/batch
Epoch: 4/20...  Training Step: 713...  Training loss: 1.8050...  9.8080 sec/batch
Epoch: 4/20...  Training Step: 714...  Training loss: 1.8032...  9.4377 sec/batch
Epoch: 4/20... 

Epoch: 5/20...  Training Step: 803...  Training loss: 1.7274...  10.8697 sec/batch
Epoch: 5/20...  Training Step: 804...  Training loss: 1.7239...  9.9642 sec/batch
Epoch: 5/20...  Training Step: 805...  Training loss: 1.7423...  8.8333 sec/batch
Epoch: 5/20...  Training Step: 806...  Training loss: 1.7830...  9.6188 sec/batch
Epoch: 5/20...  Training Step: 807...  Training loss: 1.7336...  9.5493 sec/batch
Epoch: 5/20...  Training Step: 808...  Training loss: 1.7234...  10.0381 sec/batch
Epoch: 5/20...  Training Step: 809...  Training loss: 1.7508...  9.4817 sec/batch
Epoch: 5/20...  Training Step: 810...  Training loss: 1.7885...  10.7496 sec/batch
Epoch: 5/20...  Training Step: 811...  Training loss: 1.7506...  10.0371 sec/batch
Epoch: 5/20...  Training Step: 812...  Training loss: 1.7622...  10.7746 sec/batch
Epoch: 5/20...  Training Step: 813...  Training loss: 1.7429...  10.3784 sec/batch
Epoch: 5/20...  Training Step: 814...  Training loss: 1.7825...  8.4900 sec/batch
Epoch: 5/2

Epoch: 5/20...  Training Step: 903...  Training loss: 1.6843...  9.5868 sec/batch
Epoch: 5/20...  Training Step: 904...  Training loss: 1.6865...  9.5908 sec/batch
Epoch: 5/20...  Training Step: 905...  Training loss: 1.6796...  10.6511 sec/batch
Epoch: 5/20...  Training Step: 906...  Training loss: 1.6818...  9.4627 sec/batch
Epoch: 5/20...  Training Step: 907...  Training loss: 1.6703...  8.7762 sec/batch
Epoch: 5/20...  Training Step: 908...  Training loss: 1.6444...  8.2469 sec/batch
Epoch: 5/20...  Training Step: 909...  Training loss: 1.6942...  8.4850 sec/batch
Epoch: 5/20...  Training Step: 910...  Training loss: 1.6789...  10.0131 sec/batch
Epoch: 5/20...  Training Step: 911...  Training loss: 1.6910...  9.8685 sec/batch
Epoch: 5/20...  Training Step: 912...  Training loss: 1.6868...  9.0324 sec/batch
Epoch: 5/20...  Training Step: 913...  Training loss: 1.6895...  9.0915 sec/batch
Epoch: 5/20...  Training Step: 914...  Training loss: 1.6488...  8.7742 sec/batch
Epoch: 5/20...

Epoch: 6/20...  Training Step: 1003...  Training loss: 1.6408...  9.6539 sec/batch
Epoch: 6/20...  Training Step: 1004...  Training loss: 1.6874...  11.1704 sec/batch
Epoch: 6/20...  Training Step: 1005...  Training loss: 1.6285...  10.2153 sec/batch
Epoch: 6/20...  Training Step: 1006...  Training loss: 1.6241...  10.8647 sec/batch
Epoch: 6/20...  Training Step: 1007...  Training loss: 1.6494...  9.2881 sec/batch
Epoch: 6/20...  Training Step: 1008...  Training loss: 1.6834...  9.2676 sec/batch
Epoch: 6/20...  Training Step: 1009...  Training loss: 1.6533...  9.8770 sec/batch
Epoch: 6/20...  Training Step: 1010...  Training loss: 1.6812...  9.9613 sec/batch
Epoch: 6/20...  Training Step: 1011...  Training loss: 1.6401...  10.6045 sec/batch
Epoch: 6/20...  Training Step: 1012...  Training loss: 1.6724...  9.2516 sec/batch
Epoch: 6/20...  Training Step: 1013...  Training loss: 1.6458...  8.9519 sec/batch
Epoch: 6/20...  Training Step: 1014...  Training loss: 1.6469...  8.3950 sec/batch


Epoch: 6/20...  Training Step: 1102...  Training loss: 1.6051...  9.6499 sec/batch


KeyboardInterrupt: 