# Tensorboard example


Example based on a small RNN school project.


In [20]:
import time
from collections import namedtuple
import numpy as np
import tensorflow as tf

In [21]:
with open('anna.txt', 'r') as f:
    text=f.read()
vocab = set(text)
vocab_to_int = {c: i for i, c in enumerate(vocab)}
int_to_vocab = dict(enumerate(vocab))
encoded = np.array([vocab_to_int[c] for c in text], dtype=np.int32)

In [22]:
text[:100]

'Chapter 1\n\n\nHappy families are all alike; every unhappy family is unhappy in its own\nway.\n\nEverythin'

In [23]:
encoded[:100]

array([39, 65, 55,  5, 60, 37, 34, 21, 15, 29, 29, 29, 72, 55,  5,  5, 40,
       21, 59, 55, 13, 16, 51, 16, 37, 54, 21, 55, 34, 37, 21, 55, 51, 51,
       21, 55, 51, 16, 74, 37, 28, 21, 37, 11, 37, 34, 40, 21, 69, 24, 65,
       55,  5,  5, 40, 21, 59, 55, 13, 16, 51, 40, 21, 16, 54, 21, 69, 24,
       65, 55,  5,  5, 40, 21, 16, 24, 21, 16, 60, 54, 21, 19, 33, 24, 29,
       33, 55, 40, 12, 29, 29,  6, 11, 37, 34, 40, 60, 65, 16, 24])

In [24]:
len(vocab)

83

In [25]:
def get_batches(arr, n_seqs, n_steps_per_seq):
    '''Create a generator that returns batches of size
       n_seqs x n_steps from arr.
       
       Arguments
       ---------
       arr: Array you want to make batches from
       n_seqs: Batch size, the number of sequences per batch
       n_steps: Number of sequence steps per batch
    '''
    # Get the batch size and number of batches we can make
    # ie n_seq = 10, n_steps_per_sew = 2, batch_size = 20
    
    batch_size = n_seqs * n_steps_per_seq
    
    # ie arr= 40, over 20, so 2 batches
    n_batches = len(arr) // batch_size
    
    # Keep only enough characters to make full batches
    # n_batches = 2 * batch_size = 20 = 40??
    # why not simply use len(arr)?
    
    arr = arr[ : n_batches * batch_size]
    
    # Reshape into n_seqs rows
    arr = arr.reshape((n_seqs, -1))
    
    for n in range(0, arr.shape[1], n_steps_per_seq):
        # The features
        x = arr[ :, n: n + n_steps_per_seq]
        # The targets, shifted by one
        y = np.zeros_like(x)
        y[ :, : -1], y[ : , -1] = x[ :, 1: ], x[ :, 0]
        yield x, y

In [26]:
batches = get_batches(encoded, 10, 50)
x, y = next(batches)

In [27]:
def build_inputs(batch_size, num_steps):
    ''' Define placeholders for inputs, targets, and dropout 
    
        Arguments
        ---------
        batch_size: Batch size, number of sequences per batch
        num_steps: Number of sequence steps in a batch
        
    '''
    with tf.name_scope('inputs'):
        # Declare placeholders we'll feed into the graph
        inputs = tf.placeholder(tf.int32, (batch_size, num_steps), name="inputs")
        targets = tf.placeholder(tf.int32, (batch_size, num_steps), name="targets")

        # Keep probability placeholder for drop out layers
        keep_prob = tf.placeholder(tf.float32, name='keep_prob')
    
    return inputs, targets, keep_prob

In [28]:
def single_lstm_cell(lstm_size, keep_prob):
    
    lstm = tf.contrib.rnn.NASCell(lstm_size, reuse = tf.get_variable_scope().reuse)
    
    # Add dropout to the cell outputs
    drop = tf.contrib.rnn.DropoutWrapper(lstm, output_keep_prob = keep_prob)
    
    return drop

In [29]:
def build_lstm(lstm_size, num_layers, batch_size, keep_prob):
    ''' Build LSTM cell.
    
        Arguments
        ---------
        keep_prob: Scalar tensor (tf.placeholder) for the dropout keep probability
        lstm_size: Size of the hidden layers in the LSTM cells
        num_layers: Number of LSTM layers
        batch_size: Batch size

    '''
    ### Build the LSTM Cell
    
    # Stack up multiple LSTM layers, for deep learning
    
    rnn_cells = tf.contrib.rnn.MultiRNNCell([single_lstm_cell(lstm_size, keep_prob) for _ in range(num_layers)], 
                                               state_is_tuple = True)
        
    with tf.name_scope("RNN_init_state"):
        initial_state = rnn_cells.zero_state(batch_size, tf.float32)
    
    return rnn_cells, initial_state

In [30]:
def build_output(lstm_output, in_size, out_size):
    ''' Build a softmax layer, return the softmax output and logits.
    
        Arguments
        ---------
        lstm_output: List of output tensors from the LSTM layer
        in_size: Size of the input tensor, for example, size of the LSTM cells
        out_size: Size of this softmax layer
    
    '''

    # Reshape output so it's a bunch of rows, one row for each step for each sequence.
    
    # Concatenate lstm_output over axis 1 (the columns)
    # ie t1 = t1 = [[1, 2, 3], [4, 5, 6]]
    # t2 = [[7, 8, 9], [10, 11, 12]]
    # tf.concat([t1, t2], 1) ==> [[1, 2, 3, 7, 8, 9], [4, 5, 6, 10, 11, 12]]
    seq_output = tf.concat(lstm_output, axis=1)
    
    # Reshape seq_output to a 2D tensor with lstm_size columns
    x = tf.reshape(lstm_output, [-1, in_size])
    
    # Connect the RNN outputs to a softmax layer
    with tf.variable_scope('softmax'):
        # Create the weight and bias variables here
        softmax_w = tf.Variable(tf.truncated_normal( (in_size, out_size), stddev=0.1))
        softmax_b = tf.Variable(tf.zeros( out_size ))
        
        # tensorboard
        tf.summary.histogram("softmax_w", softmax_w)
    
    # Since output is a bunch of rows of RNN cell outputs, logits will be a bunch
    # of rows of logit outputs, one for each step and sequence
    logits = tf.matmul(x,  softmax_w) + softmax_b
    
    # Use softmax to get the probabilities for predicted characters
    out = tf.nn.softmax(logits, name="predictions")
    tf.summary.histogram("predictions", out)
    
    return out, logits

In [31]:
def build_loss(logits, targets, lstm_size, num_classes):
    ''' Calculate the loss from the logits and the targets.
    
        Arguments
        ---------
        logits: Logits from final fully connected layer
        targets: Targets for supervised learning
        lstm_size: Number of LSTM hidden units
        num_classes: Number of classes in targets
        
    '''
    
    # One-hot encode targets and reshape to match logits, one row per sequence per step
    y_one_hot = tf.one_hot(targets, num_classes)
    y_reshaped =  tf.reshape( y_one_hot, logits.get_shape() )
    
    # Softmax cross entropy loss
    loss = tf.nn.softmax_cross_entropy_with_logits(logits=logits, labels=y_reshaped)
    loss = tf.reduce_mean(loss)
    
    # tensorboard
    tf.summary.scalar('loss', loss)
    
    return loss

In [32]:
def build_optimizer(loss, learning_rate, grad_clip):
    ''' Build optmizer for training, using gradient clipping.
    
        Arguments:
        loss: Network loss
        learning_rate: Learning rate for optimizer
    
    '''
    
    # Optimizer for training, using gradient clipping to control exploding gradients
    tvars = tf.trainable_variables()
    grads, _ = tf.clip_by_global_norm(tf.gradients(loss, tvars), grad_clip)
    train_op = tf.train.AdamOptimizer(learning_rate)
    optimizer = train_op.apply_gradients(zip(grads, tvars))
    
    return optimizer

In [33]:
class CharRNN:
    
    def __init__(self, num_classes, batch_size=64, num_steps=50, 
                       lstm_size=128, num_layers=2, learning_rate=0.001, 
                       grad_clip=5, sampling=False):
    
        # When we're using this network for sampling later, we'll be passing in
        # one character at a time, so providing an option for that
        if sampling == True:
            batch_size, num_steps = 1, 1
        else:
            batch_size, num_steps = batch_size, num_steps

        tf.reset_default_graph()
        
        # Build the input placeholder tensors
        self.inputs, self.targets, self.keep_prob = build_inputs(batch_size, num_steps)
        x_one_hot = tf.one_hot(self.inputs, num_classes, name="x_one_hot")

        with tf.name_scope("RNN_layers"):
            # Build the LSTM cell
            cells, self.initial_state = build_lstm(lstm_size, num_layers, batch_size, self.keep_prob)
    

        ### Run the data through the RNN layers
        with tf.name_scope("RNN_forward"):
        # Run each sequence step through the RNN with tf.nn.dynamic_rnn 
            outputs, state = tf.nn.dynamic_rnn(cells, x_one_hot, initial_state=self.initial_state)
        
        
        self.final_state = state
        
        # Get softmax predictions and logits
        self.prediction, self.logits = build_output(outputs, lstm_size, num_classes)
        
        # Loss and optimizer (with gradient clipping)
        self.loss =  build_loss(self.logits, self.targets, lstm_size, num_classes)
        self.optimizer = build_optimizer(self.loss, learning_rate, grad_clip)

In [34]:
batch_size = 64         # Sequences per batch
num_steps = 128          # Number of sequence steps per batch
lstm_size = 512         # Size of hidden layers in LSTMs
num_layers = 2          # Number of LSTM layers
learning_rate = 0.001    # Learning rate
keep_prob = 0.5         # Dropout keep probability

In [35]:
model = CharRNN(len(vocab), batch_size=batch_size, num_steps=num_steps,
                lstm_size=lstm_size, num_layers=num_layers, 
                learning_rate=learning_rate)

In [None]:
epochs = 30

# Save every N iterations
save_every_n = 200

saver = tf.train.Saver(max_to_keep=100)
with tf.Session() as sess:
    sess.run(tf.global_variables_initializer())
    
    # Tensoboard
    train_writer = tf.summary.FileWriter('./logs/1/train', sess.graph)
    # test_writer = tf.summary.FileWriter('./logs/1/test')
    
    # Use the line below to load a checkpoint and resume training
    #saver.restore(sess, 'checkpoints/______.ckpt')
    counter = 0
    for e in range(epochs):
        # Train network
        new_state = sess.run(model.initial_state)
        loss = 0
        for x, y in get_batches(encoded, batch_size, num_steps):
            counter += 1
            start = time.time()
            feed = {model.inputs: x,
                    model.targets: y,
                    model.keep_prob: keep_prob,
                    model.initial_state: new_state}
            
            merged = tf.summary.merge_all() # Tensorboard
            
            summary, batch_loss, new_state, _ = sess.run([merged, model.loss, 
                                                 model.final_state, 
                                                 model.optimizer], 
                                                 feed_dict=feed)
            
            
            train_writer.add_summary(summary, counter)
            
            
            end = time.time()
            print('Epoch: {}/{}... '.format(e+1, epochs),
                  'Training Step: {}... '.format(counter),
                  'Training loss: {:.4f}... '.format(batch_loss),
                  '{:.4f} sec/batch'.format((end-start)))
        
            if (counter % save_every_n == 0):
                saver.save(sess, "checkpoints/i{}_l{}.ckpt".format(counter, lstm_size))
    
    saver.save(sess, "checkpoints/i{}_l{}.ckpt".format(counter, lstm_size))

In [None]:
tf.train.get_checkpoint_state('checkpoints')

In [None]:
def pick_top_n(preds, vocab_size, top_n=5):
    p = np.squeeze(preds)
    p[np.argsort(p)[:-top_n]] = 0
    p = p / np.sum(p)
    c = np.random.choice(vocab_size, 1, p=p)[0]
    return c

In [None]:
def sample(checkpoint, n_samples, lstm_size, vocab_size, prime="The "):
    samples = [c for c in prime]
    model = CharRNN(len(vocab), lstm_size=lstm_size, sampling=True)
    saver = tf.train.Saver()
    with tf.Session() as sess:
        saver.restore(sess, checkpoint)
        new_state = sess.run(model.initial_state)
        for c in prime:
            x = np.zeros((1, 1))
            x[0,0] = vocab_to_int[c]
            feed = {model.inputs: x,
                    model.keep_prob: 1.,
                    model.initial_state: new_state}
            preds, new_state = sess.run([model.prediction, model.final_state], 
                                         feed_dict=feed)

        c = pick_top_n(preds, len(vocab))
        samples.append(int_to_vocab[c])

        for i in range(n_samples):
            x[0,0] = c
            feed = {model.inputs: x,
                    model.keep_prob: 1.,
                    model.initial_state: new_state}
            preds, new_state = sess.run([model.prediction, model.final_state], 
                                         feed_dict=feed)

            c = pick_top_n(preds, len(vocab))
            samples.append(int_to_vocab[c])
        
    return ''.join(samples)

Here, pass in the path to a checkpoint and sample from the network.

In [None]:
tf.train.latest_checkpoint('checkpoints')

In [None]:
checkpoint = tf.train.latest_checkpoint('checkpoints')
samp = sample(checkpoint, 2000, lstm_size, len(vocab), prime="Far")
print(samp)

In [None]:
checkpoint = 'checkpoints/i200_l512.ckpt'
samp = sample(checkpoint, 1000, lstm_size, len(vocab), prime="Far")
print(samp)

In [None]:
checkpoint = 'checkpoints/i600_l512.ckpt'
samp = sample(checkpoint, 1000, lstm_size, len(vocab), prime="Far")
print(samp)

In [None]:
checkpoint = 'checkpoints/i1200_l512.ckpt'
samp = sample(checkpoint, 1000, lstm_size, len(vocab), prime="Far")
print(samp)