In [12]:
import numpy as np
import tensorflow as tf
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline

In [13]:
def gen_data(size=1000000):
    X = np.array(np.random.choice(2, size=(size,)))
    y = []
    for i in range(size):
        threshold = .5
        if X[i-3] ==1:
            threshold += .5
        if X[i-8] ==1:
            threshold -= .25
        if np.random.rand() > threshold:
            y.append(0)
        else:
            y.append(1)
    return X, np.array(y)

In [4]:
def gen_batch(raw_data, batch_size, num_steps):
    raw_X, raw_y = raw_data
    data_length = len(raw_X)
    
    # partition raw data into batches and stack them vertically in a data matrix    
    batch_partition_length = data_length // batch_size
    data_X = np.zeros([batch_size, batch_partition_length], dtype=np.uint8)
    data_y = np.zeros([batch_size, batch_partition_length], dtype=np.uint8)
    
    for i in range(batch_size):
        data_X[i] = raw_X[batch_partition_length*i:batch_partition_length*(i + 1)]
        data_y[i] = raw_y[batch_partition_length*i:batch_partition_length*(i + 1)]
    # further divide batch partitions into num_steps for truncated backprop
    epoch_size = batch_partition_length // num_steps
    for i in range(epoch_size):
        X = data_X[:, num_steps * i : num_steps * (i+1)]
        y = data_y[:, num_steps * i : num_steps * (i+1)]
        yield (X,y)
    

In [5]:
def gen_epochs(n, num_steps):
    for i in range(n):
        yield gen_batch(gen_data(), batch_size, num_steps)

In [73]:
# Global config variables
num_steps = 5 # number of truncated backprop steps ('n' in the discussion above)
batch_size = 200
num_classes = 2
state_size = 4
learning_rate = 0.1

In [31]:
X_, y_ = gen_data()

In [9]:
"""
Placeholders
"""

X = tf.placeholder(tf.int32, shape=[batch_size, num_steps], name='input_placeholder')
y = tf.placeholder(tf.int32, shape=[batch_size, num_steps], name='labels_placeholder')
init_state = tf.zeros([batch_size, state_size])

"""
RNN Inputs
"""

# Convert X placeholder into a list of one-hot tensors
# rnn_inputs is a list of num_steps tensors with shape [batch_size, num_classes]
# X_one_hot = tf.one_hot(X, num_classes)
# rnn_inputs = tf.unstack(X_one_hot, axis=1)
rnn_inputs = tf.one_hot(X, num_classes)

"""
RNN
"""

cell = tf.contrib.rnn.BasicRNNCell(state_size)
rnn_outputs, final_state = tf.nn.dynamic_rnn(cell, rnn_inputs, initial_state=init_state)

"Predictions, Loss, Training Step"
cross_entropy = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(labels=y, logits=rnn_outputs))
optimizer = tf.train.AdamOptimizer(learning_rate=0.0001)
train = optimizer.minimize(cross_entropy)

ValueError: Dimension 0 in both shapes must be equal, but are 1000 and 200 for 'SoftmaxCrossEntropyWithLogits' (op: 'SoftmaxCrossEntropyWithLogits') with input shapes: [1000,4], [200,5].

In [None]:
"""
Definition of Rnn_Cell
This is very similar to the __call__ method on Tensorflow's BasicRNNCell. See:
https://github.com/tensorflow/tensorflow/blob/master/tensorflow/contrib/rnn/python/ops/core_rnn_cell_impl.py#L95
"""

with tf.variable_scope('rnn_cell'):
    W = tf.get_variable('W', [num_classes + state_size, state_size])
    b = tf.get_variable('b', [state_size], initializer=tf.constant_initializer(0.0))

def rnn_cell(rnn_input, state):
    with tf.variable_scope('rnn_cell', reuse=True):
        W = tf.get_variable('W', [num_classes + state_size, state_size])
        b = tf.get_variable('b', [state_size], initializer=tf.constant_initializer(0.0))
    return tf.tanh(tf.matmul(tf.concat([rnn_input, state],1), W) + b)

In [None]:
"""
Adding RNN_Cells to graph

This is a simplified version of the "static_rnn" function from Tensorflow's api. See:
https://github.com/tensorflow/tensorflow/blob/master/tensorflow/contrib/rnn/python/ops/core_rnn.py#L41
Note: In practice, using "dynamic_rnn" is a better choice that the "static_rnn":
https://github.com/tensorflow/tensorflow/blob/master/tensorflow/python/ops/rnn.py#L390
"""
state = init_state
rnn_outputs = []
for rnn_input in rnn_inputs:
    state = rnn_cell(rnn_input, state)
    rnn_outputs.append(state)
final_state = rnn_outputs[-1]

In [None]:
"""
Predictions, loss, training step

Losses is similar to the "sequence_loss"
function from Tensorflow's API, except that here we are using a list of 2D tensors, instead of a 3D tensor. See:
https://github.com/tensorflow/tensorflow/blob/master/tensorflow/contrib/seq2seq/python/ops/loss.py#L30
"""

# logits and predictions
with tf.variable_scope('softmax'):
    W = tf.get_variable('W', [state_size, num_classes])
    b = tf.get_variable('b', [num_classes], initializer=tf.constant_initializer(0.0))
logits = [tf.matmul(rnn_output, W) + b for rnn_output in rnn_outputs]
predictions = [tf.nn.softmax(logit) for logit in logits]


# turn our y placeholder into a list of labels
y_as_list = tf.unstack(y, num=num_steps, axis=1)

# losses and train step
losses = [tf.nn.sparse_softmax_cross_entropy_with_logits(labels=label, logits=logit) for \
         logit, label in zip(logits, y_as_list)]
total_loss =tf.reduce_mean(losses)
train_step = tf.train.AdagradOptimizer(learning_rate).minimize(total_loss)

In [None]:
"""
Train the network
"""
def train_network(num_epocs, num_steps, state_size=4, verbose=True):
    with tf.Session() as sess:
        sess.run(tf.global_variables_initializer())
        training_losses = []
        for idx, epoch in enumerate(gen_epochs(num_epocs, state_size)):
            training_loss = 0
            training_state = np.zeros((batch_size, state_size))
            if verbose:
                print("\nEPOCH", idx)
            for step, (X,y) in enumerate(epoch):
                tr_losses, training_loss_,training_state, _ = \
                sess.run([losses,
                            total_loss,
                            final_state,
                            train_step], 
                            feed_dict={X:X, y:y, init_state:training_state})
                training_loss += training_loss_
                if step % 100 == 0 and step>0:
                    weighted_loss = training_loss/100
                    if verbose:
                        print("Average loss at step", step,
                              "for last 250 steps:", weighted_loss)
                    training_losses.append(weighted_loss)
                    training_loss = 0
    return training_losses
                

In [None]:
training_losses = train_network(1, num_steps)