In [1]:
# 1. magic for inline plot
# 2. magic to print version
# 3. magic so that the notebook will reload external python modules
# 4. magic to enable retina (high resolution) plots
# https://gist.github.com/minrk/3301035
%matplotlib inline
%load_ext watermark
%load_ext autoreload
%autoreload 2
%config InlineBackend.figure_format = 'retina'

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split

# change default style figure and font size
plt.rcParams['figure.figsize'] = 8, 6
plt.rcParams['font.size'] = 12

%watermark -a 'Ethen' -d -t -v -p numpy,pandas,sklearn,matplotlib

Ethen 2018-04-21 14:02:21 

CPython 3.6.4
IPython 6.2.1

numpy 1.14.2
pandas 0.22.0
sklearn 0.19.1
matplotlib 2.2.2


The intuition behind RNN is that each element in the sequence 

In [2]:
import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)
warnings.simplefilter(action='ignore', category=DeprecationWarning)

import tensorflow as tf

# Import MNIST data
from tensorflow.examples.tutorials.mnist import input_data
mnist = input_data.read_data_sets("/tmp/data/", one_hot=True)

Instructions for updating:
Use the retry module or similar alternatives.
Instructions for updating:
Please use alternatives such as official/mnist/dataset.py from tensorflow/models.
Instructions for updating:
Please write your own downloading logic.
Instructions for updating:
Please use tf.data to implement this functionality.
Extracting /tmp/data/train-images-idx3-ubyte.gz
Instructions for updating:
Please use tf.data to implement this functionality.
Extracting /tmp/data/train-labels-idx1-ubyte.gz
Instructions for updating:
Please use tf.one_hot on tensors.
Extracting /tmp/data/t10k-images-idx3-ubyte.gz
Extracting /tmp/data/t10k-labels-idx1-ubyte.gz
Instructions for updating:
Please use alternatives such as official/mnist/dataset.py from tensorflow/models.


In [3]:
def variable_summaries(var):
    with tf.name_scope('summaries'):
        mean = tf.reduce_mean(var)
        tf.summary.scalar('mean', mean)
        with tf.name_scope('stddev'):
            stddev = tf.sqrt(tf.reduce_mean(tf.square(var - mean)))
        
        tf.summary.scalar('stddev', stddev)
        tf.summary.scalar('max', tf.reduce_max(var))
        tf.summary.scalar('min', tf.reduce_min(var))
        tf.summary.histogram('histogram', var)

In [4]:
# Define some parameters
element_size = 28
time_steps = 28
num_classes = 10
batch_size = 128
hidden_layer_size = 128

# Where to save TensorBoard model summaries
LOG_DIR = "logs/RNN_with_summaries"

# Create placeholders for inputs, labels
_inputs = tf.placeholder(tf.float32,shape=[None, time_steps,
                                              element_size],
                                              name='inputs')
y = tf.placeholder(tf.float32, shape=[None, num_classes],
                                              name='labels')

In [5]:
batch_x, batch_y = mnist.train.next_batch(batch_size)
# Reshape data to get 28 sequences of 28 pixels
batch_x = batch_x.reshape((batch_size, time_steps, element_size))
batch_x.shape

(128, 28, 28)

In [6]:
with tf.name_scope('rnn_weights'):
    with tf.name_scope("W_x"):
        Wx = tf.Variable(tf.zeros([element_size, hidden_layer_size]))
        variable_summaries(Wx)
    with tf.name_scope("W_h"):
        Wh = tf.Variable(tf.zeros([hidden_layer_size, hidden_layer_size]))
        variable_summaries(Wh)
    with tf.name_scope("Bias"):
        b_rnn = tf.Variable(tf.zeros([hidden_layer_size])) 
        variable_summaries(b_rnn)

In [7]:
def rnn_step(previous_hidden_state,x):   
    current_hidden_state = tf.tanh(
        tf.matmul(previous_hidden_state, Wh) +
        tf.matmul(x, Wx) + b_rnn)

    return current_hidden_state

In [8]:
# Processing inputs to work with scan function
# Current input shape: (batch_size, time_steps, element_size)
processed_input = tf.transpose(_inputs, perm=[1, 0, 2])
# Current input shape now: (time_steps, batch_size, element_size)

initial_hidden = tf.zeros([batch_size,hidden_layer_size])
# Getting all state vectors across time
all_hidden_states = tf.scan(rnn_step,
                            processed_input,
                            initializer=initial_hidden,
                            name='states')

In [9]:
with tf.name_scope('linear_layer_weights') as scope:
    with tf.name_scope("W_linear"):
        Wl = tf.Variable(tf.truncated_normal([hidden_layer_size,
                                              num_classes],
                                              mean=0,stddev=.01))
        variable_summaries(Wl)
    with tf.name_scope("Bias_linear"):
        bl = tf.Variable(tf.truncated_normal([num_classes],
                                             mean=0,stddev=.01))
        variable_summaries(bl)

# Apply linear layer to state vector    
def get_linear_layer(hidden_state):

    return tf.matmul(hidden_state, Wl) + bl

with tf.name_scope('linear_layer_weights') as scope:
    # Iterate across time, apply linear layer to all RNN outputs
    # all_outputs = tf.map_fn(get_linear_layer, all_hidden_states)
    # Get last output
    # output = all_outputs[-1]
    output = tf.matmul(all_hidden_states[-1], Wl) + bl
    tf.summary.histogram('outputs', output)

In [10]:
with tf.name_scope('cross_entropy'):
    cross_entropy = tf.reduce_mean(
   tf.nn.softmax_cross_entropy_with_logits_v2(logits=output, labels=y))
    tf.summary.scalar('cross_entropy', cross_entropy)

with tf.name_scope('train'):
    # Using RMSPropOptimizer
    train_step = tf.train.RMSPropOptimizer(0.001, 0.9)\
                                   .minimize(cross_entropy)

with tf.name_scope('accuracy'):
    correct_prediction = tf.equal(
                                tf.argmax(y,1), tf.argmax(output,1))

    accuracy = (tf.reduce_mean(
                       tf.cast(correct_prediction, tf.float32)))*100
    tf.summary.scalar('accuracy', accuracy)

# Merge all the summaries
merged = tf.summary.merge_all()

In [11]:
from tqdm import trange

# Get a small test set  
test_data = mnist.test.images[:batch_size].reshape((-1, time_steps,
                                                     element_size))
test_label = mnist.test.labels[:batch_size]

with tf.Session() as sess:
    # Write summaries to LOG_DIR -- used by TensorBoard
    train_writer = tf.summary.FileWriter(LOG_DIR + '/train',
                                         graph=tf.get_default_graph())
    test_writer = tf.summary.FileWriter(LOG_DIR + '/test',
                                        graph=tf.get_default_graph())
    
    sess.run(tf.global_variables_initializer())

    for i in trange(3000):
        
            batch_x, batch_y = mnist.train.next_batch(batch_size)
            # Reshape data to get 28 sequences of 28 pixels
            batch_x = batch_x.reshape((batch_size, time_steps,
                                       element_size))
            #summary,_ = sess.run([merged,train_step],
            #                    feed_dict={_inputs:batch_x, y:batch_y})
            # Add to summaries
            # train_writer.add_summary(summary, i)
            sess.run([train_step],
                               feed_dict={_inputs:batch_x, y:batch_y})
            
            if i % 1000 == 0:
                acc,loss, = sess.run([accuracy,cross_entropy],
                                     feed_dict={_inputs: batch_x,
                                                y: batch_y})
                print ("Iter " + str(i) + ", Minibatch Loss= " + \
                      "{:.6f}".format(loss) + ", Training Accuracy= " + \
                      "{:.5f}".format(acc))   
#             if i % 10:
#                 # Calculate accuracy for 128 MNIST test images and
#                 # add to summaries
#                 summary, acc = sess.run([merged, accuracy],
#                                         feed_dict={_inputs: test_data,
#                                                    y: test_label})
#                 test_writer.add_summary(summary, i)

    test_acc = sess.run(accuracy, feed_dict={_inputs: test_data,
                                             y: test_label})
    print ("Test Accuracy:", test_acc)


  0%|          | 11/3000 [00:00<01:04, 46.17it/s]

Iter 0, Minibatch Loss= 2.302947, Training Accuracy= 10.93750


 34%|███▍      | 1017/3000 [00:11<00:22, 88.30it/s]

Iter 1000, Minibatch Loss= 1.250852, Training Accuracy= 52.34375


 67%|██████▋   | 2018/3000 [00:23<00:11, 87.64it/s]

Iter 2000, Minibatch Loss= 0.469878, Training Accuracy= 81.25000


100%|██████████| 3000/3000 [00:34<00:00, 86.03it/s]

Test Accuracy: 96.09375





In [12]:
hi

NameError: name 'hi' is not defined

In [None]:
from subprocess import Popen

p = Popen('tensorboard --logdir={LOG_DIR}'.format(LOG_DIR), shell = True)
p.kill()

http://suriyadeepan.github.io/2017-02-13-unfolding-rnn-2/

In [None]:
# data
PAULG_PATH = 'paulg/'
PAULG_FILENAME = 'paulg/paulg.txt'

with open(PAULG_FILENAME) as f:
    lines = f.read().split('\n')

lines[0]

In [None]:
# character level vocabulary
raw_data = '\n'.join(lines)
idx2char = list(set(raw_data))
char2idx = {k: v for v, k in enumerate(vocab)}

seq_len = 20
num_chars = len(raw_data)
data_len = num_chars // seq_len

# there should be a faster way to create this
X = np.zeros([data_len, seq_len], dtype = np.int32)
Y = np.zeros([data_len, seq_len], dtype = np.int32)
for i in range(0, data_len):
    X[i] = np.array([ char2idx[char] for char in raw_data[i*seq_len:(i+1)*seq_len] ])
    Y[i] = np.array([ char2idx[char] for char in raw_data[(i*seq_len) + 1 : ((i+1)*seq_len) + 1] ])
    
X

In [None]:
import joblib

np.save(PAULG_PATH + 'idx_x.npy', X)
np.save(PAULG_PATH + 'idx_y.npy', Y)
with open(PAULG_PATH + 'metadata.pkl', 'wb') as f:
    joblib.dump({'idx2char': idx2char, 'char2idx': char2idx}, f)