code for creating a Kernel based relu-RNN learning for sequential MNIST
adapted from : Roth, Christopher, Ingmar Kanitscheider, and Ila Fiete. 2018. “Kernel RNN Learning (KeRNL),” September. https://openreview.net/forum?id=ryGfnoC5KQ.

In [54]:
import numpy as np 
import tensorflow as tf
import matplotlib.pyplot as plt 
from datetime import datetime
from pathlib import Path
import collections
import hashlib
import numbers
import matplotlib.cm as cm
from mpl_toolkits.mplot3d import axes3d
%matplotlib inline
import os

from tensorflow.python.eager import context
from tensorflow.python.framework import constant_op
from tensorflow.python.framework import dtypes
from tensorflow.python.framework import ops
from tensorflow.python.framework import tensor_shape
from tensorflow.python.framework import tensor_util
from tensorflow.python.layers import base as base_layer
from tensorflow.python.ops import array_ops
from tensorflow.python.ops import clip_ops
from tensorflow.python.ops import init_ops
from tensorflow.python.ops import math_ops
from tensorflow.python.ops import nn_ops
from tensorflow.python.ops import partitioned_variables
from tensorflow.python.ops import random_ops
from tensorflow.python.ops import tensor_array_ops
from tensorflow.python.ops import variable_scope as vs
from tensorflow.python.ops import variables as tf_variables
from tensorflow.python.platform import tf_logging as logging
from tensorflow.python.util import nest
from tensorflow.contrib.rnn.python.ops.core_rnn_cell import _Linear
from matplotlib.colors import LinearSegmentedColormap

In [55]:
# uplading mnist data 

old_v = tf.logging.get_verbosity()
tf.logging.set_verbosity(tf.logging.ERROR)

from tensorflow.examples.tutorials.mnist import input_data

mnist = input_data.read_data_sets("MNIST_data/", one_hot=True)
train_data = mnist.train.images  # Returns np.array
train_labels = np.asarray(mnist.train.labels, dtype=np.int32)
eval_data = mnist.test.images  # Returns np.array
eval_labels = np.asarray(mnist.test.labels, dtype=np.int32)

tf.logging.set_verbosity(old_v)

Extracting MNIST_data/train-images-idx3-ubyte.gz
Extracting MNIST_data/train-labels-idx1-ubyte.gz
Extracting MNIST_data/t10k-images-idx3-ubyte.gz
Extracting MNIST_data/t10k-labels-idx1-ubyte.gz


In [66]:
# Training Parameters
learning_rate = 1e-5
training_steps = 5000
batch_size = 128
display_step = 200
test_len=128
grad_clip=200
# Network Parameters
num_input = 1 # MNIST data input (img shape: 28*28)
timesteps = 28*28 # timesteps
num_hidden = 128 # hidden layer num of features
num_classes = 10 # MNIST total classes (0-9 digits)

# tf Graph input


In [67]:
def RNN(x, weights, biases):

    # Prepare data shape to match `rnn` function requirements
    # Current data input shape: (batch_size, timesteps, n_input)
    # Required shape: 'timesteps' tensors list of shape (batch_size, n_input)

    # Unstack to get a list of 'timesteps' tensors of shape (batch_size, n_input)
    
    # using variable scope to initialize to identity 
    with tf.variable_scope('recurrent',initializer=tf.initializers.identity()) as scope: 
        # Define a lstm cell with tensorflow
        lstm_cell = tf.contrib.rnn.BasicRNNCell(num_hidden,name='irnn')
        # Get lstm cell output
        outputs, states = tf.nn.dynamic_rnn(lstm_cell, x, dtype=tf.float32)

    # Linear activation, using rnn inner loop last output
    return tf.matmul(outputs[:,-1,:], weights['out']) + biases['out']

In [68]:
tf.reset_default_graph()
graph=tf.Graph()
with graph.as_default():
    # Define weights
    weights = {
        'out': tf.Variable(tf.random_normal([num_hidden, num_classes]),name='output_weight')
    }
    biases = {
        'out': tf.Variable(tf.random_normal([num_classes]),name='output_bias')
    }
    X = tf.placeholder("float", [None, timesteps, num_input])
    Y = tf.placeholder("float", [None, num_classes])
    logits = RNN(X, weights, biases)
    prediction = tf.nn.softmax(logits)
    variable_names=[v.name for v in tf.trainable_variables()]
    # Define loss and optimizer
    loss_op = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits_v2(
    logits=logits, labels=Y))
    optimizer = tf.train.RMSPropOptimizer(learning_rate=learning_rate)
    # compute gradients 
    grads_and_vars=optimizer.compute_gradients(loss_op)
    # clip the gradient based on norm clipping:  g^ <-- threshold/l2_norm(g^)*g^
    cropped_grads_and_vars=[(tf.clip_by_norm(grad, 2.0),var) if  np.unicode_.find(var.name,'output')==-1 else (grad,var) for grad,var in grads_and_vars]
    train_op = optimizer.minimize(loss_op)
    # Evaluate model (with test logits, for dropout to be disabled)
    correct_pred = tf.equal(tf.argmax(prediction, 1), tf.argmax(Y, 1))
    accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32))
    # Initialize the variables (i.e. assign their default value)
    init = tf.global_variables_initializer()
        # predictions 
        #prediction=tf.nn.softmax(logits)
    tf.summary.histogram('prediction',prediction+1e-8)
    tf.summary.histogram('logits',logits+1e-8)
    tf.summary.scalar('loss',loss_op)
    merged_summary_op=tf.summary.merge_all()
    # define loss 
        #loss_op=tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits_v2(logits=logits,labels=Y))
        # optimization loop 
        #tf.summary.scalar('loss',loss_op)
        #tf.summary.histogram('logits',logits)    
        #optimizer=tf.train.AdamOptimizer(learning_rate=learning_rate)
        #gradients=optimizer.compute_gradients(loss_op)
        #capped_gvs = [(tf.clip_by_norm(grad, 1.), var) if not var.name.startswith("dense") else (grad, var) for grad, var in gradients]
        #for _, var in gradients:
        #    if var.name.startswith("dense"):
        #        print(var.name)   
        #train_op=optimizer.apply_gradients(capped_gvs)
        # initialize variables 
    merged_summary_op=tf.summary.merge_all()
        
        #saver=tf.train.Saver()
    

In [69]:
# verify initialization 
with tf.Session(graph=graph) as sess : 
    sess.run(init)
    values = sess.run(variable_names)
    for k, v in zip(variable_names,values):
        print(["variable: " , k])
        print(["value: " , v])
        print(["variable: " , np.unicode_.find(k,'output')]) 
        print(["shape: " , v.shape])
        #print(v) 
     

['variable: ', 'output_weight:0']
['value: ', array([[-0.57610005, -0.86781466,  0.06146901, ...,  0.612929  ,
        -0.5240201 , -1.372101  ],
       [-0.16586345,  0.10811837,  0.1457429 , ...,  0.57598233,
        -0.01288651,  1.6567582 ],
       [-0.30377197,  2.4883525 ,  0.54930466, ...,  1.2439618 ,
         1.3481966 ,  0.16031669],
       ...,
       [-0.6923631 , -0.28866172, -1.3959543 , ..., -0.05419074,
        -1.1114011 ,  0.20392828],
       [-1.4248531 ,  0.58069444,  1.3152072 , ..., -1.3075926 ,
        -0.5103756 , -1.1841886 ],
       [ 1.8997777 , -0.8674232 ,  0.62497854, ..., -0.8217903 ,
         1.4718441 , -1.832837  ]], dtype=float32)]
['variable: ', 0]
['shape: ', (128, 10)]
['variable: ', 'output_bias:0']
['value: ', array([-1.0550154 ,  0.07599307,  0.59524196, -1.3103149 ,  1.4098233 ,
        0.10235035, -0.8149388 , -2.5393212 , -0.90383595,  0.2311637 ],
      dtype=float32)]
['variable: ', 0]
['shape: ', (10,)]
['variable: ', 'recurrent/rnn/irnn/k

In [72]:
log_dir = "logs/irnn/bptt_gc_%d_eta_%d_batch_%d_run_%s" %(grad_clip,learning_rate,batch_size, datetime.now().strftime("%Y%m%d_%H%M"))
Path(log_dir).mkdir(exist_ok=True, parents=True)
filelist = [ f for f in os.listdir(log_dir) if f.endswith(".local") ]
for f in filelist:
    os.remove(os.path.join(log_dir, f))

In [73]:

# write graph into tensorboard 
tb_writer = tf.summary.FileWriter(log_dir,graph)
# run a training session 
with tf.Session(graph=graph) as sess:
    sess.run(init)
    for step in range(1,50):#range(1,training_steps+1):
        batch_x, batch_y = mnist.train.next_batch(batch_size)
        batch_x=batch_x.reshape((batch_size,timesteps,num_input))
        # run optimizaer 
        sess.run(train_op,feed_dict={X:batch_x, Y:batch_y})
        loss_train, acc_train= sess.run([loss_op, accuracy],feed_dict={X:batch_x, Y:batch_y})
        merged_summary=sess.run(merged_summary_op,feed_dict={X:batch_x, Y:batch_y})
        tb_writer.add_summary(merged_summary, global_step=step)
        #tb_writer.flush()
        # show interim performance 
        if step % display_step==0 or step==1 : 
            # get batch loss and accuracy 
            print('Step: {}, Train Loss: {:.3f}, Train Acc: {:.3f}'.format(
            step + 1, loss_train, acc_train))
            # write summary 
            #tb_writer.add_summary(acc,global_step=step)
            #tb_writer.flush()
            # evaluate performance on test data 
            test_X=mnist.test.images[:test_len].reshape((-1, timesteps, num_input))
            test_Y=mnist.test.labels[:test_len]

    print("Optimization Finished!")
    test_data = mnist.test.images[:test_len].reshape((-1, timesteps, num_input))
    test_label = mnist.test.labels[:test_len]
    print("Testing Accuracy:", \
        sess.run(accuracy, feed_dict={X: test_data, Y: test_label}))

Step: 2, Train Loss: 2.859, Train Acc: 0.070
Optimization Finished!
Testing Accuracy: 0.0703125


'logs/irnn/irnn/bptt_gc_100_eta_0_run_20190121_1220'

get the name of trainable variables in the graph