## Some basic tensorflow examples

In [1]:
%%javascript
IPython.OutputArea.prototype._should_scroll = function(lines) {
    return false;
}


<IPython.core.display.Javascript object>

In [2]:
# import and useful code to visualise graphs in a notebook:

import tensorflow as tf
from tensorflow import scan
import numpy as np
from IPython.display import clear_output, Image, display, HTML, IFrame


# following 2 copied from 
# https://github.com/tensorflow/tensorflow/blob/master/tensorflow/examples/tutorials/deepdream/deepdream.ipynb
def strip_consts(graph_def, max_const_size=32):
    """Strip large constant values from graph_def."""
    strip_def = tf.GraphDef()
    for n0 in graph_def.node:
        n = strip_def.node.add() 
        n.MergeFrom(n0)
        if n.op == 'Const':
            tensor = n.attr['value'].tensor
            size = len(tensor.tensor_content)
            if size > max_const_size:
                tensor.tensor_content = "<stripped %d bytes>"%size
    return strip_def

def show_graph(graph_def, max_const_size=32):
    """Visualize TensorFlow graph."""
    if hasattr(graph_def, 'as_graph_def'):
        graph_def = graph_def.as_graph_def()
    strip_def = strip_consts(graph_def, max_const_size=max_const_size)
    code = """
        <script>
          function load() {{
            document.getElementById("{id}").pbtxt = {data};
          }}
        </script>
        <link rel="import" href="https://tensorboard.appspot.com/tf-graph-basic.build.html" onload=load()>
        <div style="height:600px">
          <tf-graph-basic id="{id}"></tf-graph-basic>
        </div>
    """.format(data=repr(str(strip_def)), id='graph'+str(np.random.rand()))

    iframe = """
        <iframe seamless style="width:800px;height:400px;border:0" srcdoc="{}"></iframe>
    """.format(code.replace('"', '&quot;'))
    display(HTML(iframe))

## Graph definition and execution

In [3]:
# essential line for notebooks - variables declarations will clash with existing ones when re-run cells
# without this
tf.reset_default_graph()

# using get_varible() on it's own checks if we have double used a variable
# differnce with Variable() constructor is constructor will duplicate if already created 
# calling it name, name_1, name_2, etc under the hood
# to have get_variable auto reuse:
# with tf.variable_scope("processing_block_1", reuse=tf.AUTO_REUSE):
#    t_1 = tf.get_variable("t_1", shape=(), initializer=tf.constant_initializer(1.0))
# otherwise "tf.reset_default_graph()" is needed in notebooks ^
t_1 = tf.get_variable("t_1", shape=(), initializer=tf.constant_initializer(2.0))

# we first need to have an init (operation), and put it in the session.run(init) before other runs
# else no variables will be initialised
init = tf.global_variables_initializer()

vec_1 = tf.placeholder(dtype=tf.float32, name="vec_1", shape=(1,3))
res = tf.multiply(t_1, vec_1)
res2 = tf.multiply(res, res)

with tf.Session() as sess:
    # name is vec_1:0 when created as an instance of Varible, held in vec_1.name attribute
    # intermediate names produced by operations are operation_name:0, operation_name:1 etc
    # for operations that produces multiple tensors. Duplucated operations are Mul, Mul_1, etc...
    # To print all of op produced intermediate tensors:
    # for each_op in sess.graph.get_operations():
    #     print(each_op.name, each_op.values())
    #     # or equivalently res.op.name etc. using <variable name>.op.name will also work 
    sess.run(init)
    res_ = sess.run([res], feed_dict={'vec_1:0': np.array([[1.0, 2.0, 3.0]])})
    print(res_)
    
#     # specifying op:index_of_result also works:
#     print(sess.run(['Mul:0'], feed_dict={'vec_1:0': np.array([[1.0, 2.0, 3.0]])}))

   


[array([[2., 4., 6.]], dtype=float32)]


## Variable, Graph, and Session scopes

In [4]:
# creating a new graph, set as default, and add variables to it:
graph1 = tf.Graph()
with graph1.as_default():
    # note we don't need to reset default graph here,
    # everytime we run this we create a new graph and set it as default
    t_1 = tf.get_variable("t_1", shape=(), initializer=tf.constant_initializer(-2.0))
    init_1 = tf.global_variables_initializer()

    vec_1 = tf.placeholder(dtype=tf.float32, name="vec_1", shape=(1,3))
    res_1 = tf.multiply(t_1, vec_1)
        
# create another graph:
graph2 = tf.Graph()
with graph2.as_default():
    t_2 = tf.get_variable("t_2", shape=(), initializer=tf.constant_initializer(2.0))
    init_2 = tf.global_variables_initializer()

    vec_2 = tf.placeholder(dtype=tf.float32, name="vec_2", shape=(1,3))
    res_2 = tf.multiply(t_2, vec_2)    
    
# we specify which graph to run if not default:
with tf.Session(graph=graph1) as sess:
    sess.run(init_1)
    res_ = sess.run([res_1], feed_dict={vec_1.name: np.array([[1.0, 2.0, 3.0]])})
    print(res_)
    
with tf.Session(graph=graph2) as sess:
    sess.run(init_2)
    res_ = sess.run([res_2], feed_dict={vec_2.name: np.array([[1.0, 2.0, 3.0]])})
    print(res_)
    
# note the different variable names as well as the "name" parameter
# we can have same "name" parameter but variable names have to be different else we lose the reference
# the same "name" parameter only works for distributed TF. Otherwise we used different
# variable name + different "name" parameter


[array([[-2., -4., -6.]], dtype=float32)]
[array([[2., 4., 6.]], dtype=float32)]


## Some handly tensor manipulations

In [5]:
tf.reset_default_graph()

# shape of a tensor having the out most index first. e.g. shape = (3, 4) is a outer 3, each
# inner element consists of 4 elements. e.g. [[1, 2, 3, 4],[5, 6, 7, 8],[11, 12, 13, 14]] just like numpy
with tf.Session() as sess:
    print(sess.run(tf.placeholder(dtype=tf.float32, name='start_', shape=(3,4)),
                  feed_dict={'start_:0': np.array([[1, 2, 3, 4],[5, 6, 7, 8],[11, 12, 13, 14]], 
                                                  dtype=np.float32)}))

[[ 1.  2.  3.  4.]
 [ 5.  6.  7.  8.]
 [11. 12. 13. 14.]]


In [6]:
tf.reset_default_graph()

base_array = np.array([[[11.1, 12.1, 13.1, 14.1, 15.1, 16.1],
                        [21.1, 22.1, 23.1, 24.1, 25.1, 26.1],
                        [31.1, 32.1, 33.1, 34.1, 35.1, 36.1], 
                        [41.1, 42.1, 43.1, 44.1, 45.1, 46.1],
                        [51.1, 52.1, 53.1, 54.1, 55.1, 56.1]],
                       [[11.2, 12.2, 13.2, 14.2, 15.2, 16.2],
                        [21.2, 22.2, 23.2, 24.2, 25.2, 26.2],
                        [31.2, 32.2, 33.2, 34.2, 35.2, 36.2], 
                        [41.2, 42.2, 43.2, 44.2, 45.2, 46.2],
                        [51.2, 52.2, 53.2, 54.2, 55.2, 56.2]],
                       [[11.3, 12.3, 13.3, 14.3, 15.3, 16.3],
                        [21.3, 22.3, 23.3, 24.3, 25.3, 26.3],
                        [31.3, 32.3, 33.3, 34.3, 35.3, 36.3], 
                        [41.3, 42.3, 43.3, 44.3, 45.3, 46.3],
                        [51.3, 52.3, 53.3, 54.3, 55.3, 56.3]],], dtype=np.float32)
base_3d = tf.placeholder(dtype=tf.float32, name='base_3d', shape=(3,5,6))

    
# slicing:
# tf.slice(input_, [start_index_0, start_index_1, ...], [size_0, size_1, size_2]), size=-1 means "till end"
slice_1 = tf.slice(base_3d, [0, 2, 3], [-1, -1, -1])   
slice_2 = tf.slice(base_3d, [1, 0, 3], [-1, 3, -1])

# tf.squeeze removes all dim = 1 of a tensor
removed_dim1 = tf.squeeze(tf.slice(base_3d, [1, 0, 3], [1, 3, -1])) 

# rehshape flatterns it, and goes backwards, i.e. take 3 first, do 3 again 6 times, then repeat for 5 times
# same as np.array reshape -> it's a "view" of the memory block
reshaped_1 = tf.reshape(base_3d, [5, 6, 3])

square_1 = tf.squeeze(tf.slice(base_3d, [0, 0, 0], [1, 3, 3])) 
square_2 = tf.squeeze(tf.slice(base_3d, [0, 2, 3], [1, -1, -1])) 

base_3d_3by3 = tf.slice(base_3d, [0, 0, 0], [3, 3, 3])
transpose_1 = tf.transpose(base_3d_3by3, [0, 2, 1])
transpose_2 = tf.transpose(base_3d_3by3, [1, 0, 2])

# stacking at first (out most) axis
axis_0_stack = tf.concat([square_1, square_2], axis=0)
axis_1_stack = tf.concat([square_1, square_2], axis=1)

# "unstack" is good for feeding data into a model:
unstack_0 = tf.unstack(base_3d, axis = 0)

with tf.Session() as sess:
    print('\n--tf.slice(base_3d, [0, 2, 3], [-1, -1, -1])')
    print(sess.run([slice_1], feed_dict={base_3d.name: base_array}))
    
    print('\n--tf.slice(base_3d, [1, 0, 3], [-1, 3, -1])')
    print(sess.run([slice_2], feed_dict={base_3d.name: base_array}))
    
    print('\n--tf.squeeze(tf.slice(base_3d, [1, 0, 3], [1, 3, -1]))')
    print(sess.run([removed_dim1], feed_dict={base_3d.name: base_array}))
    
    print('\n--tf.reshape(base_3d, [5, 6, 3])')
    print(sess.run([reshaped_1], feed_dict={base_3d.name: base_array}))

    print('\n--tf.transpose(base_3d_3by3, [0, 2, 1])')
    print(sess.run([transpose_1], feed_dict={base_3d.name: base_array}))

    print('\n--tf.transpose(base_3d_3by3, [1, 0, 2])')
    print(sess.run([transpose_2], feed_dict={base_3d.name: base_array}))

    
    print('\n--tf.concat([square_1, square_2], axis=0)')
    print(sess.run([axis_0_stack], feed_dict={base_3d.name: base_array}))

    print('\n--tf.concat([square_1, square_2], axis=1)')
    print(sess.run([axis_1_stack], feed_dict={base_3d.name: base_array}))

    print('\n--tf.unstack(base_3d, axis = 0)')
    unstacked = sess.run([unstack_0], feed_dict={base_3d.name: base_array})
#     print('unstacked an array with dim {} to {} elements'.format(base_3d.shape,len(unstacked[0])))
    print(unstacked)
        
        



--tf.slice(base_3d, [0, 2, 3], [-1, -1, -1])
[array([[[34.1, 35.1, 36.1],
        [44.1, 45.1, 46.1],
        [54.1, 55.1, 56.1]],

       [[34.2, 35.2, 36.2],
        [44.2, 45.2, 46.2],
        [54.2, 55.2, 56.2]],

       [[34.3, 35.3, 36.3],
        [44.3, 45.3, 46.3],
        [54.3, 55.3, 56.3]]], dtype=float32)]

--tf.slice(base_3d, [1, 0, 3], [-1, 3, -1])
[array([[[14.2, 15.2, 16.2],
        [24.2, 25.2, 26.2],
        [34.2, 35.2, 36.2]],

       [[14.3, 15.3, 16.3],
        [24.3, 25.3, 26.3],
        [34.3, 35.3, 36.3]]], dtype=float32)]

--tf.squeeze(tf.slice(base_3d, [1, 0, 3], [1, 3, -1]))
[array([[14.2, 15.2, 16.2],
       [24.2, 25.2, 26.2],
       [34.2, 35.2, 36.2]], dtype=float32)]

--tf.reshape(base_3d, [5, 6, 3])
[array([[[11.1, 12.1, 13.1],
        [14.1, 15.1, 16.1],
        [21.1, 22.1, 23.1],
        [24.1, 25.1, 26.1],
        [31.1, 32.1, 33.1],
        [34.1, 35.1, 36.1]],

       [[41.1, 42.1, 43.1],
        [44.1, 45.1, 46.1],
        [51.1, 52.1, 53.1],
 

In [7]:
# this is a reshape/transpose that's particularly useful for feeding mini batches to RNNs:

tf.reset_default_graph()

# example mini batch. network accepts 3 inputs of time series. each of this 3xT arrays is a data "point"
# total 4 data "points" going into this minibatch

mb = np.array(
    [
        [  # data "point" 1
            [11.1, 11.2, 11.3, 11.4, 11.5, 11.6],
            [21.1, 21.2, 21.3, 21.4, 21.5, 21.6],
            [31.1, 31.2, 31.3, 31.4, 31.5, 31.6]
        ],
        [  # data "point" 2
            [12.1, 12.2, 12.3, 12.4, 12.5, 12.6],
            [22.1, 22.2, 22.3, 22.4, 22.5, 22.6],
            [32.1, 32.2, 32.3, 32.4, 32.5, 32.6]            
        ],
        [  # data "point" 3
            [13.1, 13.2, 13.3, 13.4, 13.5, 13.6],
            [23.1, 23.2, 23.3, 23.4, 23.5, 23.6],
            [33.1, 33.2, 33.3, 33.4, 33.5, 33.6]            
        ]
    ],
    dtype=np.float32
)

mb_tf = tf.placeholder(dtype=tf.float32, name="mb_tf", shape=(None, 3, None))

with tf.Session() as sess:
    mb_tf_eval = sess.run([tf.transpose(mb_tf, [2, 0, 1])],
                                        feed_dict={mb_tf.name: mb})
   
    mb_tf_eval_original = sess.run([tf.transpose(mb_tf, [0, 1, 2])], 
                                   feed_dict={mb_tf.name: mb})
    
    print('mini batch transformed:\n {}'.format(mb_tf_eval))
    print('then transformed back:\n {}'.format(mb_tf_eval_original))
          

# t = 0
# [array([[[11.1, 21.1, 31.1],  # data "point" 1, all 3 inputs 
#         [12.1, 22.1, 32.1],   # data "point" 2, all 3 inputs
#         [13.1, 23.1, 33.1]],  # data "point" 3, all 3 inputs
# t = 1
#        [[11.2, 21.2, 31.2],   # data "point" 1, all 3 inputs
#         [12.2, 22.2, 32.2],   # data "point" 2, all 3 inputs
#         [13.2, 23.2, 33.2]],  # data "point" 3, all 3 inputs
# t = 2
#        [[11.3, 21.3, 31.3],   # .... 
#         [12.3, 22.3, 32.3],
#         [13.3, 23.3, 33.3]],

#        [[11.4, 21.4, 31.4],
#         [12.4, 22.4, 32.4],
#         [13.4, 23.4, 33.4]],

#        [[11.5, 21.5, 31.5],
#         [12.5, 22.5, 32.5],
#         [13.5, 23.5, 33.5]],

#        [[11.6, 21.6, 31.6],
#         [12.6, 22.6, 32.6],
#         [13.6, 23.6, 33.6]]], dtype=float32)]

# such that if we feed this into scan(), we feed in data at t = 0, 1, 2, ... 
# At each timestamp, we multiply the weights matrix (3 inputs x #neurons) with all 
# data "points" in paralle. i.e.
# [[11.t, 21.t, 31.t],        [[--#neurons--],              [[--#neurons--],  # output from data "point" 1
#  [12.t, 22.t, 32.t],         [--#neurons--],            =  [--#neurons--],   # output from data "point" 2
#  [13.t, 23.t, 33.t]] matmul  [--#neurons--]] (weights)     [--#neurons--]]   # output from data "point" 3


mini batch transformed:
 [array([[[11.1, 21.1, 31.1],
        [12.1, 22.1, 32.1],
        [13.1, 23.1, 33.1]],

       [[11.2, 21.2, 31.2],
        [12.2, 22.2, 32.2],
        [13.2, 23.2, 33.2]],

       [[11.3, 21.3, 31.3],
        [12.3, 22.3, 32.3],
        [13.3, 23.3, 33.3]],

       [[11.4, 21.4, 31.4],
        [12.4, 22.4, 32.4],
        [13.4, 23.4, 33.4]],

       [[11.5, 21.5, 31.5],
        [12.5, 22.5, 32.5],
        [13.5, 23.5, 33.5]],

       [[11.6, 21.6, 31.6],
        [12.6, 22.6, 32.6],
        [13.6, 23.6, 33.6]]], dtype=float32)]
then transformed back:
 [array([[[11.1, 11.2, 11.3, 11.4, 11.5, 11.6],
        [21.1, 21.2, 21.3, 21.4, 21.5, 21.6],
        [31.1, 31.2, 31.3, 31.4, 31.5, 31.6]],

       [[12.1, 12.2, 12.3, 12.4, 12.5, 12.6],
        [22.1, 22.2, 22.3, 22.4, 22.5, 22.6],
        [32.1, 32.2, 32.3, 32.4, 32.5, 32.6]],

       [[13.1, 13.2, 13.3, 13.4, 13.5, 13.6],
        [23.1, 23.2, 23.3, 23.4, 23.5, 23.6],
        [33.1, 33.2, 33.3, 33.4, 33.5, 33.6]]

## A "DIY" RNN

In [8]:
tf.reset_default_graph()


class MyRNN:
    """
    A "DIY" one layer recurrent neural network
    
    all inputs are connected to all neurons. feedback at each neuron involves all
    states of all neurons in the layer. Using linear feedback.
    
    S(t) = W_{fwd} * input(t) + W_{bk} * S(t-1) 
    Y(t) = tanh(S(t))
    
    S being states of all neurons
    
    """
    
    def __init__(self, nof_neurons, nof_inputs, session=None, learning_rate=0.01):
        """
        num. outputs =  num. inputs
        num. internal hidden states = num. neurons
        
        nof_neurons: int
        nof_inputs: int
        session: tf.Session
            can supply this so we share session with other graph runs
        """
        # a hack to have multiple instance of MyRNN, not sharing variables and under
        # different name spaces:
        self.name = MyRNN.get_instance_name()
        self.nof_neurons = nof_neurons
        self.nof_inputs = nof_inputs
        
        self.optimiser = tf.train.GradientDescentOptimizer(learning_rate=learning_rate)
        
        # everytime constructor is called new weight variables will be instantiated
        with tf.variable_scope('{}/weights'.format(self.name), reuse=False):
            self._W_frd = tf.get_variable('_W_frd' , dtype=tf.float32, shape=(nof_inputs, nof_neurons),
                                         initializer=tf.random_uniform_initializer(maxval=1, minval=-1),
                                         trainable=True)
            self._W_bk = tf.get_variable('_W_bk', dtype=tf.float32, shape=(nof_inputs, nof_neurons),
                                    initializer=tf.random_uniform_initializer(maxval=1, minval=-1),
                                    trainable=True)
            self._W_out = tf.get_variable('_W_out', dtype=tf.float32, shape=(nof_neurons, nof_inputs),
                                     initializer=tf.random_uniform_initializer(maxval=1, minval=-1),
                                     trainable=True)
            
        # network input with num. input "data points" determined by feed_dict shape:
        self.network_input = tf.placeholder(dtype=tf.float32, name="network_input", 
                                            shape=(None, nof_inputs, None))
        self.network_output = tf.placeholder(dtype=tf.float32, name="network_output", 
                                             shape=(None, nof_inputs, None))
            
        if session is None:
            self.session = tf.Session()
        else:
            self.session = session
        
        # initialise variables
        self.session.run(self._W_frd.initializer)
        self.session.run(self._W_bk.initializer)
        self.session.run(self._W_out.initializer)  
        
    @staticmethod
    def get_instance_name():
        """
        genertes name for MyRNN instances on the same graph.
        """
        instance_indices = [int(n.name.split('/')[0].split('_')[-1])  
                            for n in tf.get_default_graph().as_graph_def().node 
                            if n.name.startswith('MyRNN')]
        if instance_indices:
            instance_indices.sort()
            new_index = instance_indices[-1] + 1
            return 'MyRNN_{}'.format(new_index)
        else:
            return 'MyRNN_0'
        
    @property
    def W_out(self):
        return self.session.run(self._W_out)
        
    @property
    def W_frd(self):
        return self.session.run(self._W_frd)
    
    @property
    def W_bk(self):
        return self.session.run(self._W_bk)
    
    def one_step(self, input_):
        """
        evaluates one step of the network using current state
        """
        res = self.session.run(self._one_step(self.network_input), 
                               feed_dict={self.network_input.name: input_})
        return res
    
    def compute_outputs(self, input_):
        with tf.variable_scope('{}/eval'.format(self.name)):
            res = self.session.run(self._forward(self.network_input),
                                   feed_dict={self.network_input.name: input_})
        return res
            
    def _forward(self, X):
        """
        
        X: tensor
            with dim D x N x M
            i.e. D input data "points", N being number of inputs
            and M being the "time index" - multi variant time series
            grouped into mini. batches. Each batch having D data points. 
        
        Returns
        -------
        tensor
        """
        with tf.variable_scope('{}/forward'.format(self.name)):
            # scan takes fn: initializer is not set, so X[0] must contain indvidial values:
            # inserting initial values here - lets not re-define it every time:
            input_for_scan = tf.transpose(X, [2, 0, 1])
            init_state = tf.zeros_like(tf.slice(input_for_scan, [0, 0, 0], [1, -1, -1]))            
            forward_batch_output = scan(self._one_step, 
                                        tf.concat([init_state, input_for_scan], axis=0))
            # discarding first result and reshape back to original:
        return tf.transpose(tf.slice(forward_batch_output, [1, 0, 0], [-1, -1, -1]), [1, 2, 0])
        
    def _one_step(self, prev_output, X):
        """
        prev_output: 2d tensor, representing state at time t - 1
        X: 2d tensor. representing input at time t
            [
              [--#inputs--], # data "point" 1
              [--#inputs--], # data "point" 2
              [--#inputs--],...
            ]            
        """

        return tf.tanh(tf.matmul(tf.matmul(X, self._W_frd) + tf.matmul(prev_output, self._W_bk), 
                                 self._W_out))

    def train(self, X, Y):
        """
        trains the network using back prop.
        cost function is simple square loss 
        
        X: tensor
            mini batch input - see docstrings in _forward() for expected shape
        Y: tensor
            mini batch example output for training - same shape as X
        
        """
        
        network_out = self._forward(self.network_input)
        cost = tf.reduce_mean(tf.square(network_out - self.network_output))
        training_op = self.optimiser.minimize(cost)
        self.session.run(training_op, 
                              feed_dict={self.network_input.name: X, 
                                         self.network_output.name: Y})        
        
diy_rnn = MyRNN(nof_neurons=5, nof_inputs=3)
diy_rnn2 = MyRNN(nof_neurons=6, nof_inputs=4)

# test minibatch. 4 data points. 3 input time series per data point
test_mb = np.array([[[0.111, 0.121, 0.131, 0.141, 0.151, 0.161],
                     [0.211, 0.221, 0.231, 0.241, 0.251, 0.261],
                     [0.311, 0.321, 0.331, 0.341, 0.351, 0.361]],
                    [[0.112, 0.122, 0.132, 0.142, 0.152, 0.162],
                     [0.212, 0.222, 0.232, 0.242, 0.252, 0.262],
                     [0.312, 0.322, 0.332, 0.342, 0.352, 0.362]],
                    [[0.113, 0.123, 0.133, 0.143, 0.153, 0.163],
                     [0.213, 0.223, 0.233, 0.243, 0.253, 0.263],
                     [0.313, 0.323, 0.333, 0.343, 0.353, 0.363]],
                    [[0.114, 0.124, 0.134, 0.144, 0.154, 0.164],
                     [0.214, 0.224, 0.234, 0.244, 0.254, 0.264],
                     [0.314, 0.324, 0.334, 0.344, 0.354, 0.364]]
                   ], dtype=np.float32)


print('example forward outputs:\n{}'.format(diy_rnn.compute_outputs(test_mb)))

print('Wout before training:\n{}'.format(diy_rnn.W_out))
print('Wfrd before training:\n{}'.format(diy_rnn.W_frd))
print('Wbk before training:\n{}'.format(diy_rnn.W_bk))

diy_rnn.train(test_mb, test_mb)

print('Wout after training:\n{}'.format(diy_rnn.W_out))
print('Wfrd after training:\n{}'.format(diy_rnn.W_frd))
print('Wbk after training:\n{}'.format(diy_rnn.W_bk))

# If we save graph using tf.summary.FileWriter("logs", tf.get_default_graph()).close()
# and start tensorboard to view the graph:
# seems to have issues with tensorboard 1.10.0/ tf-nightly (1.10.0.dev20180625), but fixed:
# from tensorboard import main
# ... main.run_main()    

# have a look at the graph with tensorboard - run this in chrome to work
show_graph(tf.get_default_graph().as_graph_def())



example forward outputs:
[[[-0.43713775 -0.5196746  -0.5963596  -0.68890154 -0.7367517
   -0.76427716]
  [-0.31076908 -0.56929684 -0.5769629  -0.65653247 -0.70673895
   -0.7324885 ]
  [-0.08484887  0.1796611   0.46338734  0.6080182   0.6995294
    0.7454171 ]]

 [[-0.4383958  -0.52091837 -0.5976772  -0.69006836 -0.7377465
   -0.765126  ]
  [-0.31211182 -0.5707486  -0.5784257  -0.657848   -0.70784944
   -0.73346263]
  [-0.08519451  0.18023323  0.4643329   0.6089428   0.70020485
    0.7458622 ]]

 [[-0.43965217 -0.52215964 -0.59899104 -0.6912303  -0.7387365
   -0.7659709 ]
  [-0.31345338 -0.57219595 -0.5798844  -0.65915823 -0.7089546
   -0.7344322 ]
  [-0.08554003  0.18080395  0.46527416  0.6098619   0.700875
    0.74630326]]

 [[-0.4409068  -0.5233982  -0.6003011  -0.6923874  -0.73972183
   -0.76681185]
  [-0.31479365 -0.57363886 -0.58133864 -0.66046315 -0.7100548
   -0.73539746]
  [-0.08588555  0.18137339  0.46621126  0.61077535  0.7015399
    0.7467404 ]]]
Wout before training:
[[-0.5