In [2]:
# https://medium.com/@erikhallstrm/hello-world-rnn-83cd7105b767
DEBUG = True

import re, random, math, csv, io, string, itertools, sys
import numpy as np
# import pandas as pd
import tensorflow as tf

In [3]:
def test_data_1(n_unrollings=100, echo_step=2, batch_size=5):
    x = np.array(np.random.choice(2, n_unrollings, p=[0.5, 0.5]))
    y = np.roll(x, echo_step)
    y[0:echo_step] = 0

    x = x.reshape((batch_size, -1))  # The first index changing slowest, subseries as rows
    y = y.reshape((batch_size, -1))

    return (x, y)

In [12]:
class RNN:
    hp = dict(
        batch_sz=20,
        backprop_len=15,
        n_classes=2,
        state_sz=5,
        ckpt_path="./checkpoints/"
    )
    def __init__(self, **hyper_parameters):
        if hyper_parameters is not None:
            for k, v in hyper_parameters.items():
                self.hp[k] = v
    
    def _init_graph(self):
        tf.reset_default_graph()
        self.inputs_batch = tf.placeholder(tf.float32, [None, self.hp['backprop_len']], "inputs_batch")
        self.labels_batch = tf.placeholder(tf.int32, [None, self.hp['backprop_len']], "labels_batch")
        self.init_state = tf.placeholder(tf.float32,   [None, self.hp['state_sz']],     "init_state")
        
        self.w_tarns = tf.Variable(
            np.random.rand(
                self.hp['state_sz']+1, #prev_state + 1 for inputs
                self.hp['state_sz']
            ), 
            dtype=tf.float32
        )            
        self.b_trans = tf.Variable(np.zeros([1,self.hp['state_sz']]), dtype=tf.float32)
        self.w_out = tf.Variable(np.random.rand(self.hp["state_sz"], self.hp["n_classes"]), dtype=tf.float32)
        self.b_out = tf.Variable(np.zeros([1,self.hp['n_classes']]), dtype=tf.float32)
        
        self.inputs_seq = tf.unstack(self.inputs_batch, axis=1)
        self.labels_seq = tf.unstack(self.labels_batch, axis=1)

    def _forward(self):
        curr_state = self.init_state
        state_seq = []
        for curr_input in self.inputs_seq:
            curr_input = tf.reshape(curr_input, [-1, 1])
            comm = tf.concat([curr_input, curr_state],axis=1)
            next_state = tf.tanh(tf.matmul(comm, self.w_tarns) + self.b_trans)
            state_seq.append(next_state)
            curr_state = next_state
            
        self.curr_state = curr_state
        self.logits_seq = [tf.matmul(state,self.w_out)+self.b_out for state in state_seq]
        
        
    def _train_graph(self):
        self._init_graph()
        self._forward()
        
        losses = [
            tf.nn.sparse_softmax_cross_entropy_with_logits(
                logits = logits,
                labels = labels
            ) 
            for logits, labels in zip(self.logits_seq, self.labels_seq)
        ]
        self.total_loss = tf.reduce_mean(losses)
        self.train_step = tf.train.AdagradOptimizer(0.3).minimize(self.total_loss)
        
    
    def _infer_graph(self):
        self._init_graph()
        self._forward()
        self.preds_seq = [tf.nn.softmax(logit) for logit in self.logits_seq]
        
    def predict(self,x):
        nobs = np.shape(x)[0]
        self._infer_graph()
        prediction=[]
        if nobs<self.hp["backprop_len"]:
            x = np.pad(x, (0, self.hp["backprop_len"] - nobs),mode="constant")
        with tf.Session() as sess:
            sess.run(tf.initialize_all_variables())
            ckpt = tf.train.get_checkpoint_state("checkpoints")
            n_batches = 1 + int(nobs>self.hp['backprop_len'])*(nobs - self.hp['backprop_len'])
            start_idx = 0
            end_idx = self.hp["backprop_len"]
            curr_state = np.zeros([1, self.hp['state_sz']])
            is_first = True
            for batch_num in range(n_batches):
                x_batch = [x[start_idx:]]
                pred = sess.run(
                    [self.preds_seq],
                    feed_dict={
                        self.inputs_batch:x_batch,
                        self.init_state:curr_state
                    }
                )
                if is_first:
                    prediction.extend(pred)
                    is_first=False
                else:
                    prediction.append(pred[-1])
        return prediction[0][:nobs]
    
    def train(self, data_generator, n_epochs=10):
        self._train_graph()
        with tf.Session() as sess:
            train_writer = tf.summary.FileWriter( './mdl_tr', sess.graph)
            sess.run(tf.initialize_all_variables())
            losses=[]
            for epoch_num in range(n_epochs):
                x,y = data_generator()
                nobs = np.shape(x)[1]
                curr_state = np.zeros([np.shape(x)[0], self.hp['state_sz']])
                n_batches = nobs//self.hp['backprop_len']
                for batch_num in range(n_batches):
                    start_idx = batch_num * self.hp["backprop_len"]
                    end_idx = start_idx + min(nobs-start_idx, self.hp["backprop_len"])
                    
                    x_batch = x[:, start_idx:end_idx]
                    y_batch = y[:, start_idx:end_idx]
#                     merge = tf.summary.merge_all()
                    train_step, loss, curr_state = sess.run(
                        [self.train_step, self.total_loss, self.curr_state],
                        feed_dict={
                            self.inputs_batch:x_batch,
                            self.labels_batch:y_batch,
                            self.init_state:curr_state
                        }
                    )
                    
                print(f"loss={loss}")
#               train_writer.add_summary(summary, epoch_num)
                saver = tf.train.Saver()
                saver.save(sess, self.hp['ckpt_path'] + "nn.mdl", global_step=epoch_num)

In [13]:
rnn = RNN()

In [14]:
rnn.train(data_generator=lambda : test_data_1(n_unrollings=50000, batch_size=5),n_epochs=100)

loss=0.001625494216568768
loss=0.0006677026394754648
loss=0.000501706381328404
loss=0.00034393538953736424
loss=0.0003377927641849965
loss=0.00019592831085901707
loss=0.00018332937906961888
loss=0.0001781180762918666
loss=0.0001455811579944566
loss=0.00014301769260782748
loss=0.0001219673576997593
loss=0.00011415749759180471
loss=0.0001081654045265168
loss=9.112348925555125e-05
loss=9.098824375541881e-05
loss=8.634729601908475e-05
loss=7.364335033344105e-05
loss=8.19774650153704e-05
loss=7.231456402223557e-05
loss=7.440849731210619e-05
loss=7.339398871408775e-05
loss=6.562075577676296e-05
loss=5.4728756367694587e-05
loss=5.86020942137111e-05
loss=4.9290203605778515e-05
loss=5.5592743592569605e-05
loss=5.537425022339448e-05
loss=5.720061017200351e-05
loss=4.3438263674033806e-05
loss=3.724615817191079e-05
loss=3.439793363213539e-05
loss=4.102733510080725e-05
loss=4.2090468923561275e-05
loss=3.7538640754064545e-05
loss=3.6117704439675435e-05
loss=5.4464631830342114e-05
loss=3.745761569007

In [15]:
rnn.predict([0,1,0,1,0])

[array([[0.5, 0.5]], dtype=float32),
 array([[0.46303487, 0.53696513]], dtype=float32),
 array([[0.42363644, 0.57636356]], dtype=float32),
 array([[0.42664018, 0.57335985]], dtype=float32),
 array([[0.42498055, 0.5750195 ]], dtype=float32)]

In [233]:
test_data_1(n_unrollings=3000, batch_size=150)

(array([[0, 1, 1, ..., 0, 0, 0],
        [1, 0, 0, ..., 0, 1, 0],
        [0, 0, 0, ..., 1, 1, 1],
        ...,
        [0, 1, 1, ..., 0, 1, 1],
        [0, 1, 0, ..., 0, 0, 0],
        [1, 0, 0, ..., 1, 1, 0]]), array([[0, 0, 0, ..., 0, 0, 0],
        [0, 0, 1, ..., 0, 0, 0],
        [1, 0, 0, ..., 1, 1, 1],
        ...,
        [0, 1, 0, ..., 1, 1, 0],
        [1, 1, 0, ..., 0, 1, 0],
        [0, 0, 1, ..., 1, 1, 1]]))

In [31]:
from __future__ import print_function, division
import numpy as np
import tensorflow as tf
import matplotlib.pyplot as plt
%matplotlib inline

num_epochs = 100
total_series_length = 50000
truncated_backprop_length = 15
state_size = 4
num_classes = 2
echo_step = 2
batch_size = 5
num_batches = total_series_length//batch_size//truncated_backprop_length

def generateData():
    x = np.array(np.random.choice(2, total_series_length, p=[0.5, 0.5]))
    y = np.roll(x, echo_step)
    y[0:echo_step] = 0

    x = x.reshape((batch_size, -1))  # The first index changing slowest, subseries as rows
    y = y.reshape((batch_size, -1))

    return (x, y)


class RNN:
    def _train_graph(self):
        batchX_placeholder = tf.placeholder(tf.float32, [batch_size, truncated_backprop_length])
        batchY_placeholder = tf.placeholder(tf.int32, [batch_size, truncated_backprop_length])

        init_state = tf.placeholder(tf.float32, [batch_size, state_size])

        W = tf.Variable(np.random.rand(state_size+1, state_size), dtype=tf.float32)
        b = tf.Variable(np.zeros((1,state_size)), dtype=tf.float32)

        W2 = tf.Variable(np.random.rand(state_size, num_classes),dtype=tf.float32)
        b2 = tf.Variable(np.zeros((1,num_classes)), dtype=tf.float32)

        # Unpack columns
        inputs_series = tf.unstack(batchX_placeholder, axis=1)
        labels_series = tf.unstack(batchY_placeholder, axis=1)

        # Forward pass
        current_state = init_state
        states_series = []
        for current_input in inputs_series:
            current_input = tf.reshape(current_input, [batch_size, 1])
            input_and_state_concatenated = tf.concat([current_input, current_state], axis=1)  # Increasing number of columns

            next_state = tf.tanh(tf.matmul(input_and_state_concatenated, W) + b)  # Broadcasted addition
            states_series.append(next_state)
            current_state = next_state
        self.current_state = current_state
        logits_series = [tf.matmul(state, W2) + b2 for state in states_series] #Broadcasted addition
        self.predictions_series = [tf.nn.softmax(logits) for logits in logits_series]

        losses = [
            tf.nn.sparse_softmax_cross_entropy_with_logits(
                logits=logits, labels=labels
            ) for logits, labels in zip(logits_series,labels_series)
        ]
        self.total_loss = tf.reduce_mean(losses)

        self.train_step = tf.train.AdagradOptimizer(0.3).minimize(self.total_loss)

    def train(self):
        self._train_graph()
        with tf.Session() as sess:
            sess.run(tf.initialize_all_variables())
            loss_list = []
            for epoch_idx in range(num_epochs):
                x,y = generateData()
                _current_state = np.zeros((batch_size, state_size))
                print("New data, epoch", epoch_idx)
                for batch_idx in range(num_batches):
                    start_idx = batch_idx * truncated_backprop_length
                    end_idx = start_idx + truncated_backprop_length

                    batchX = x[:,start_idx:end_idx]
                    batchY = y[:,start_idx:end_idx]

                    _total_loss, _train_step, _current_state, self_predictions_series = sess.run(
                        [self.total_loss, self.train_step, self.current_state, self.predictions_series],
                        feed_dict={
                            batchX_placeholder:batchX,
                            batchY_placeholder:batchY,
                            init_state:_current_state
                        })

                    loss_list.append(_total_loss)

                    if batch_idx%100 == 0:
                        print("Step",batch_idx, "Loss", _total_loss)
                        print("Step",batch_idx, "Loss", _total_loss)
                        print(f"pred={np.array(_predictions_series)[:,0,:]}")
                        print(f"x={np.array(batchX)[0,:]}")
                saver = tf.train.Saver()
                saver.save(sess, self.hp['ckpt_path'] + "nn.mdl", global_step=epoch_num)
                
    def _predict_graph(self):
        batchX_placeholder = tf.placeholder(tf.float32, [batch_size, truncated_backprop_length])
        batchY_placeholder = tf.placeholder(tf.int32, [batch_size, truncated_backprop_length])

        init_state = tf.placeholder(tf.float32, [batch_size, state_size])

        W = tf.Variable(np.random.rand(state_size+1, state_size), dtype=tf.float32)
        b = tf.Variable(np.zeros((1,state_size)), dtype=tf.float32)

        W2 = tf.Variable(np.random.rand(state_size, num_classes),dtype=tf.float32)
        b2 = tf.Variable(np.zeros((1,num_classes)), dtype=tf.float32)

        # Unpack columns
        inputs_series = tf.unstack(batchX_placeholder, axis=1)
        labels_series = tf.unstack(batchY_placeholder, axis=1)

        # Forward pass
        current_state = init_state
        states_series = []
        for current_input in inputs_series:
            current_input = tf.reshape(current_input, [batch_size, 1])
            input_and_state_concatenated = tf.concat([current_input, current_state], axis=1)  # Increasing number of columns

            next_state = tf.tanh(tf.matmul(input_and_state_concatenated, W) + b)  # Broadcasted addition
            states_series.append(next_state)
            current_state = next_state

        logits_series = [tf.matmul(state, W2) + b2 for state in states_series] #Broadcasted addition
        predictions_series = [tf.nn.softmax(logits) for logits in logits_series]
    

In [32]:
rnn=RNN()
rnn.train()

New data, epoch 0


InvalidArgumentError: You must feed a value for placeholder tensor 'Placeholder_22' with dtype int32 and shape [5,15]
	 [[Node: Placeholder_22 = Placeholder[dtype=DT_INT32, shape=[5,15], _device="/job:localhost/replica:0/task:0/device:CPU:0"]()]]

Caused by op 'Placeholder_22', defined at:
  File "/usr/local/Cellar/python/3.6.5/Frameworks/Python.framework/Versions/3.6/lib/python3.6/runpy.py", line 193, in _run_module_as_main
    "__main__", mod_spec)
  File "/usr/local/Cellar/python/3.6.5/Frameworks/Python.framework/Versions/3.6/lib/python3.6/runpy.py", line 85, in _run_code
    exec(code, run_globals)
  File "/Users/roman/py3/lib/python3.6/site-packages/ipykernel_launcher.py", line 16, in <module>
    app.launch_new_instance()
  File "/Users/roman/py3/lib/python3.6/site-packages/traitlets/config/application.py", line 658, in launch_instance
    app.start()
  File "/Users/roman/py3/lib/python3.6/site-packages/ipykernel/kernelapp.py", line 497, in start
    self.io_loop.start()
  File "/Users/roman/py3/lib/python3.6/site-packages/tornado/platform/asyncio.py", line 132, in start
    self.asyncio_loop.run_forever()
  File "/usr/local/Cellar/python/3.6.5/Frameworks/Python.framework/Versions/3.6/lib/python3.6/asyncio/base_events.py", line 422, in run_forever
    self._run_once()
  File "/usr/local/Cellar/python/3.6.5/Frameworks/Python.framework/Versions/3.6/lib/python3.6/asyncio/base_events.py", line 1432, in _run_once
    handle._run()
  File "/usr/local/Cellar/python/3.6.5/Frameworks/Python.framework/Versions/3.6/lib/python3.6/asyncio/events.py", line 145, in _run
    self._callback(*self._args)
  File "/Users/roman/py3/lib/python3.6/site-packages/tornado/platform/asyncio.py", line 122, in _handle_events
    handler_func(fileobj, events)
  File "/Users/roman/py3/lib/python3.6/site-packages/tornado/stack_context.py", line 300, in null_wrapper
    return fn(*args, **kwargs)
  File "/Users/roman/py3/lib/python3.6/site-packages/zmq/eventloop/zmqstream.py", line 450, in _handle_events
    self._handle_recv()
  File "/Users/roman/py3/lib/python3.6/site-packages/zmq/eventloop/zmqstream.py", line 480, in _handle_recv
    self._run_callback(callback, msg)
  File "/Users/roman/py3/lib/python3.6/site-packages/zmq/eventloop/zmqstream.py", line 432, in _run_callback
    callback(*args, **kwargs)
  File "/Users/roman/py3/lib/python3.6/site-packages/tornado/stack_context.py", line 300, in null_wrapper
    return fn(*args, **kwargs)
  File "/Users/roman/py3/lib/python3.6/site-packages/ipykernel/kernelbase.py", line 283, in dispatcher
    return self.dispatch_shell(stream, msg)
  File "/Users/roman/py3/lib/python3.6/site-packages/ipykernel/kernelbase.py", line 233, in dispatch_shell
    handler(stream, idents, msg)
  File "/Users/roman/py3/lib/python3.6/site-packages/ipykernel/kernelbase.py", line 399, in execute_request
    user_expressions, allow_stdin)
  File "/Users/roman/py3/lib/python3.6/site-packages/ipykernel/ipkernel.py", line 208, in do_execute
    res = shell.run_cell(code, store_history=store_history, silent=silent)
  File "/Users/roman/py3/lib/python3.6/site-packages/ipykernel/zmqshell.py", line 537, in run_cell
    return super(ZMQInteractiveShell, self).run_cell(*args, **kwargs)
  File "/Users/roman/py3/lib/python3.6/site-packages/IPython/core/interactiveshell.py", line 2662, in run_cell
    raw_cell, store_history, silent, shell_futures)
  File "/Users/roman/py3/lib/python3.6/site-packages/IPython/core/interactiveshell.py", line 2785, in _run_cell
    interactivity=interactivity, compiler=compiler, result=result)
  File "/Users/roman/py3/lib/python3.6/site-packages/IPython/core/interactiveshell.py", line 2907, in run_ast_nodes
    if self.run_code(code, result):
  File "/Users/roman/py3/lib/python3.6/site-packages/IPython/core/interactiveshell.py", line 2961, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-32-694f9779f647>", line 2, in <module>
    rnn.train()
  File "<ipython-input-31-0bc9001f085f>", line 68, in train
    self._train_graph()
  File "<ipython-input-31-0bc9001f085f>", line 30, in _train_graph
    batchY_placeholder = tf.placeholder(tf.int32, [batch_size, truncated_backprop_length])
  File "/Users/roman/py3/lib/python3.6/site-packages/tensorflow/python/ops/array_ops.py", line 1735, in placeholder
    return gen_array_ops.placeholder(dtype=dtype, shape=shape, name=name)
  File "/Users/roman/py3/lib/python3.6/site-packages/tensorflow/python/ops/gen_array_ops.py", line 4925, in placeholder
    "Placeholder", dtype=dtype, shape=shape, name=name)
  File "/Users/roman/py3/lib/python3.6/site-packages/tensorflow/python/framework/op_def_library.py", line 787, in _apply_op_helper
    op_def=op_def)
  File "/Users/roman/py3/lib/python3.6/site-packages/tensorflow/python/util/deprecation.py", line 454, in new_func
    return func(*args, **kwargs)
  File "/Users/roman/py3/lib/python3.6/site-packages/tensorflow/python/framework/ops.py", line 3155, in create_op
    op_def=op_def)
  File "/Users/roman/py3/lib/python3.6/site-packages/tensorflow/python/framework/ops.py", line 1717, in __init__
    self._traceback = tf_stack.extract_stack()

InvalidArgumentError (see above for traceback): You must feed a value for placeholder tensor 'Placeholder_22' with dtype int32 and shape [5,15]
	 [[Node: Placeholder_22 = Placeholder[dtype=DT_INT32, shape=[5,15], _device="/job:localhost/replica:0/task:0/device:CPU:0"]()]]


In [None]:
from __future__ import print_function, division
import numpy as np
import tensorflow as tf
import matplotlib.pyplot as plt

num_epochs = 100
total_series_length = 50000
truncated_backprop_length = 15
state_size = 4
num_classes = 2
echo_step = 3
batch_size = 5
num_batches = total_series_length//batch_size//truncated_backprop_length

def generateData():
    x = np.array(np.random.choice(2, total_series_length, p=[0.5, 0.5]))
    y = np.roll(x, echo_step)
    y[0:echo_step] = 0

    x = x.reshape((batch_size, -1))  # The first index changing slowest, subseries as rows
    y = y.reshape((batch_size, -1))

    return (x, y)

batchX_placeholder = tf.placeholder(tf.float32, [batch_size, truncated_backprop_length])
batchY_placeholder = tf.placeholder(tf.int32, [batch_size, truncated_backprop_length])

init_state = tf.placeholder(tf.float32, [batch_size, state_size])

W = tf.Variable(np.random.rand(state_size+1, state_size), dtype=tf.float32)
b = tf.Variable(np.zeros((1,state_size)), dtype=tf.float32)

W2 = tf.Variable(np.random.rand(state_size, num_classes),dtype=tf.float32)
b2 = tf.Variable(np.zeros((1,num_classes)), dtype=tf.float32)

# Unpack columns
inputs_series = tf.unstack(batchX_placeholder, axis=1)
labels_series = tf.unstack(batchY_placeholder, axis=1)

# Forward pass
current_state = init_state
states_series = []
for current_input in inputs_series:
    current_input = tf.reshape(current_input, [batch_size, 1])
    input_and_state_concatenated = tf.concat([current_input, current_state], axis=1)  # Increasing number of columns

    next_state = tf.tanh(tf.matmul(input_and_state_concatenated, W) + b)  # Broadcasted addition
    states_series.append(next_state)
    current_state = next_state

logits_series = [tf.matmul(state, W2) + b2 for state in states_series] #Broadcasted addition
predictions_series = [tf.nn.softmax(logits) for logits in logits_series]

losses = [
    tf.nn.sparse_softmax_cross_entropy_with_logits(
        logits=logits, labels=labels
    ) for logits, labels in zip(logits_series,labels_series)
]
total_loss = tf.reduce_mean(losses)

train_step = tf.train.AdagradOptimizer(0.3).minimize(total_loss)

def plot(loss_list, predictions_series, batchX, batchY):
    plt.subplot(2, 3, 1)
    plt.cla()
    plt.plot(loss_list)

    for batch_series_idx in range(5):
        one_hot_output_series = np.array(predictions_series)[:, batch_series_idx, :]
        single_output_series = np.array([(1 if out[0] < 0.5 else 0) for out in one_hot_output_series])
        print("single_output_series:", single_output_series)
        print("batchY[batch_series_idx, :]",batchY[batch_series_idx, :])
        print("")
        plt.subplot(2, 3, batch_series_idx + 2)
        plt.cla()
        plt.axis([0, truncated_backprop_length, 0, 2])
        left_offset = range(truncated_backprop_length)
        plt.bar(left_offset, batchX[batch_series_idx, :], width=1, color="blue")
        plt.bar(left_offset, batchY[batch_series_idx, :] * 0.5, width=1, color="red")
        plt.bar(left_offset, single_output_series * 0.3, width=1, color="green")

    plt.draw()
    plt.pause(0.0001)


with tf.Session() as sess:
    sess.run(tf.initialize_all_variables())
    plt.ion()
    plt.figure()
    plt.show()
    loss_list = []

    for epoch_idx in range(num_epochs):
        x,y = generateData()
        _current_state = np.zeros((batch_size, state_size))

        print("New data, epoch", epoch_idx)

        for batch_idx in range(num_batches):
            start_idx = batch_idx * truncated_backprop_length
            end_idx = start_idx + truncated_backprop_length

            batchX = x[:,start_idx:end_idx]
            batchY = y[:,start_idx:end_idx]

            _total_loss, _train_step, _current_state, _predictions_series = sess.run(
                [total_loss, train_step, current_state, predictions_series],
                feed_dict={
                    batchX_placeholder:batchX,
                    batchY_placeholder:batchY,
                    init_state:_current_state
                })

            loss_list.append(_total_loss)

            if batch_idx%100 == 0:
                print("Step",batch_idx, "Loss", _total_loss)
                plot(loss_list, _predictions_series, batchX, batchY)

plt.ioff()
plt.show()