In [None]:
# python libraries
import numpy as np 
import matplotlib.pyplot as plt 
import collections
import hashlib
import numbers
import matplotlib.cm as cm
from sys import getsizeof
from datetime import datetime
from pathlib import Path
import os

from IPython.display import HTML
import re

# tensorflow and its dependencies 
import tensorflow as tf
from tensorflow.python.eager import context
from tensorflow.python.framework import constant_op
from tensorflow.python.framework import dtypes
from tensorflow.python.framework import ops
from tensorflow.python.framework import tensor_shape
from tensorflow.python.framework import tensor_util
from tensorflow.python.layers import base as base_layer
from tensorflow.python.ops import array_ops
from tensorflow.python.ops import clip_ops
from tensorflow.python.ops import init_ops
from tensorflow.python.ops import math_ops
from tensorflow.python.ops import nn_ops
from tensorflow.python.ops import partitioned_variables
from tensorflow.python.ops import random_ops
from tensorflow.python.ops import tensor_array_ops
from tensorflow.python.ops import variable_scope as vs
from tensorflow.python.ops import variables as tf_variables
from tensorflow.python.platform import tf_logging as logging
from tensorflow.python.util import nest
from tensorflow.contrib.rnn.python.ops.core_rnn_cell import _Linear
from tensorflow.contrib import slim

## user defined modules 
# kernel rnn cell 
import keRNL_cell_v2 

In [None]:
# uplading mnist data 
old_v = tf.logging.get_verbosity()
tf.logging.set_verbosity(tf.logging.ERROR)
from tensorflow.examples.tutorials.mnist import input_data
mnist = input_data.read_data_sets("MNIST_data/", one_hot=True)
train_data = mnist.train.images  # Returns np.array
train_labels = np.asarray(mnist.train.labels, dtype=np.int32)
eval_data = mnist.test.images  # Returns np.array
eval_labels = np.asarray(mnist.test.labels, dtype=np.int32)
tf.logging.set_verbosity(old_v)

# Training Parameters
weight_learning_rate = 1e-6
tensor_learning_rate = 1e-8 # learning rate for sensitivity tensor and temporal filter tensor 
training_steps = 5000
batch_size = 20
display_step = 200
test_len=128
grad_clip=100
# Network Parameters
num_input = 1 # MNIST data input (img shape: 28*28)
timesteps = 28*28 # timesteps
num_hidden = 100 # hidden layer num of features
num_classes = 10 # MNIST total classes (0-9 digits)
# 

# report batch number 
total_batch = int(mnist.train.num_examples / batch_size)
print("Total number of batches:", total_batch)

# Noise Parameters
perturbation_std=1e-10
log_dir = os.environ['HOME']+"/MyData/KeRNL/logs/kernl_rnn_MNIST/MNIST_eta_tensor_%1.0e_eta_weight_%1.0e_batch_%1.0e_hum_hidd_%1.0e_gc_%1.0e_steps_%1.0e_run_%s" %(tensor_learning_rate,weight_learning_rate,batch_size,num_hidden,grad_clip,training_steps, datetime.now().strftime("%Y%m%d_%H%M"))
log_dir

In [None]:
## define KeRNL unit
def kernl_rnn(x,kernel_weights,kernel_bias):
    # Define a KeRNL cell, the initialization is done inside the cell with default initializers
    with tf.variable_scope('kernl') as scope: 
        keRNL_v2 = keRNL_cell_v2.KeRNLCell_v2(num_units=num_hidden,
                                              num_inputs=num_input,
                                              time_steps=timesteps,
                                              noise_std=perturbation_std,
                                              sensitivity_initializer=tf.initializers.identity)
        # Get KeRNL cell output
        kernel_outputs, kernel_states = tf.nn.dynamic_rnn(keRNL_v2, inputs=x, dtype=tf.float32,time_major=False)
        kernl_rnn_output=tf.matmul(kernel_outputs[:,-1,:], kernel_weights) + kernel_bias
        
    return kernl_rnn_output, kernel_states  

In [None]:
tf.reset_default_graph()
graph=tf.Graph()
with graph.as_default():
    with tf.variable_scope('kernl_output',initializer=tf.initializers.random_normal()) as scope:
        keRNL_weights = tf.get_variable(shape=[num_hidden, num_classes],name='output_weight')
        keRNL_biases = tf.get_variable(shape=[num_classes],name='output_addition')

    # define weights and inputs to the network
    X = tf.placeholder("float", [None, timesteps, num_input])
    Y = tf.placeholder("float", [None, num_classes])
    # define a function for extraction of variable names
    keRNL_output,keRNL_states=kernl_rnn(X,keRNL_weights,keRNL_biases)
    trainables=tf.trainable_variables()
    variable_names=[v.name for v in tf.trainable_variables()]
    # 
    find_joing_index = lambda x, name_1,name_2 : [a and b for a,b in zip([np.unicode_.find(k.name, name_1)>-1 for k in x] ,[np.unicode_.find(k.name, name_2)>-1 for k in x])].index(True)
    # find trainable parameters for keRNL 
    with tf.name_scope('KeRNL_Trainables') as scope:
        keRNL_output_weight_index= find_joing_index(trainables,'kernl','output_weight')
        keRNL_output_addition_index= find_joing_index(trainables,'kernl','output_addition')
        keRNL_temporal_filter_index= find_joing_index(trainables,'kernl','temporal_filter')
        keRNL_sensitivity_tensor_index= find_joing_index(trainables,'kernl','sensitivity_tensor')
        keRNL_kernel_index= find_joing_index(trainables,'kernl','kernel')
        keRNL_bias_index= find_joing_index(trainables,'kernl','bias')
    # 
        keRNL_tensor_training_indices=np.asarray([keRNL_sensitivity_tensor_index,keRNL_temporal_filter_index],dtype=np.int)
        keRNL_tensor_trainables= [trainables[k] for k in keRNL_tensor_training_indices]
    #
        keRNL_weight_training_indices=np.asarray([keRNL_kernel_index,keRNL_bias_index,keRNL_output_weight_index,keRNL_output_addition_index],dtype=np.int)
        keRNL_weight_trainables= [trainables[k] for k in keRNL_weight_training_indices]
     
    # define loss functions  
    ##################
    # keRNL train ####
    ##################
    with tf.name_scope("KeRNL_train") as scope:
        # outputs 
        keRNL_loss_output_prediction=tf.losses.softmax_cross_entropy(onehot_labels=Y,logits=keRNL_output)
        keRNL_prediction = tf.nn.softmax(keRNL_output)
        keRNL_correct_pred = tf.equal(tf.argmax(keRNL_prediction, 1), tf.argmax(Y, 1))
        keRNL_accuracy = tf.reduce_mean(tf.cast(keRNL_correct_pred, tf.float32))
        # states 
        keRNL_loss_state_prediction=tf.losses.mean_squared_error(tf.subtract(keRNL_states.h_hat, keRNL_states.h),tf.matmul(keRNL_states.Gamma,trainables[keRNL_sensitivity_tensor_index]))
        # define optimizers 
        keRNL_weight_optimizer = tf.train.RMSPropOptimizer(learning_rate=weight_learning_rate)
        keRNL_tensor_optimizer = tf.train.RMSPropOptimizer(learning_rate=tensor_learning_rate)
        
        with tf.name_scope('KeRNL_train_tensors') as scope:
            keRNL_delta_sensitivity=tf.subtract(tf.matmul(keRNL_states.Theta,tf.transpose(trainables[keRNL_sensitivity_tensor_index])),tf.subtract(keRNL_states.h_hat,keRNL_states.h))
            keRNL_sensitivity_tensor_update= tf.reduce_mean(tf.einsum("un,uv->unv",keRNL_delta_sensitivity,keRNL_states.Theta),axis=0)
            keRNL_temporal_filter_update= tf.reduce_mean(tf.multiply(tf.matmul(keRNL_delta_sensitivity,trainables[keRNL_sensitivity_tensor_index]),keRNL_states.Gamma),axis=0)
            keRNL_tensor_grads_and_vars=list(zip([keRNL_sensitivity_tensor_update,keRNL_temporal_filter_update],keRNL_tensor_trainables))
            keRNL_cropped_tensor_grads_and_vars=[(tf.clip_by_norm(grad, grad_clip),var) if  np.unicode_.find(var.name,'output')==-1 else (grad,var) for grad,var in keRNL_tensor_grads_and_vars]
            keRNL_tensor_train_op = keRNL_tensor_optimizer.apply_gradients(keRNL_cropped_tensor_grads_and_vars)
        
        with tf.name_scope('KeRNL_train_weights') as scope: 
            keRNL_grad_cost_to_output=tf.gradients(keRNL_loss_output_prediction,keRNL_output, name= 'keRNL_grad_cost_to_y')
            keRNL_error_in_hidden_state=tf.matmul(keRNL_grad_cost_to_output[-1],tf.transpose(trainables[keRNL_output_weight_index]))
            keRNL_delta_weight=tf.matmul(keRNL_error_in_hidden_state,trainables[keRNL_sensitivity_tensor_index]) 
            keRNL_weight_update_test=tf.einsum("un,unv->unv",keRNL_delta_weight,keRNL_states.eligibility_trace)
            keRNL_weight_update=tf.transpose(tf.reduce_mean(keRNL_weight_update_test,axis=0))
            
            keRNL_bias_update_test=tf.multiply(keRNL_delta_weight,keRNL_states.bias_trace)
            keRNL_bias_update=tf.reduce_mean(keRNL_bias_update_test,axis=0)
            # output layer 
            keRNL_grad_cost_to_output_layer=tf.gradients(keRNL_loss_output_prediction,[trainables[keRNL_output_weight_index],trainables[keRNL_output_addition_index]], name= 'keRNL_grad_cost_to_output_layer')
            # crop the gradients  
            keRNL_weight_grads_and_vars=list(zip([keRNL_weight_update,keRNL_bias_update,keRNL_grad_cost_to_output_layer[0],keRNL_grad_cost_to_output_layer[1]],keRNL_weight_trainables))
            keRNL_cropped_weight_grads_and_vars=[(tf.clip_by_norm(grad, grad_clip),var) if  np.unicode_.find(var.name,'output')==-1 else (grad,var) for grad,var in keRNL_weight_grads_and_vars]
            # apply gradients 
            keRNL_weight_train_op = keRNL_weight_optimizer.apply_gradients(keRNL_cropped_weight_grads_and_vars)
    ##################
    # SUMMARIES ######
    ##################
    
    with tf.name_scope("keRNL_tensor_summaries") as scope: 
        # keRNL sensitivity tensor 
        tf.summary.histogram('keRNL_sensitivity_tensor_grad',keRNL_sensitivity_tensor_update+1e-10)
        tf.summary.histogram('keRNL_sensitivity_tensor',trainables[keRNL_sensitivity_tensor_index]+1e-10)
        # keRNL temporal filter 
        tf.summary.histogram('keRNL_temporal_filter_grad',keRNL_temporal_filter_update+1e-10)
        tf.summary.histogram('keRNL_temporal_filter',trainables[keRNL_temporal_filter_index]+1e-10)
        # keRNL loss 
        tf.summary.scalar('keRNL_loss_state_prediction',keRNL_loss_state_prediction+1e-10)
        # keRNL senstivity tensor and temporal filter 
        tf.summary.image('keRNL_sensitivity_tensor',tf.expand_dims(tf.expand_dims(trainables[keRNL_sensitivity_tensor_index],axis=0),axis=-1))
        tf.summary.image('keRNL_sensitivity_tensor_grad',tf.expand_dims(tf.expand_dims(keRNL_sensitivity_tensor_update,axis=0),axis=-1))
        tf.summary.image('keRNL_temporal_filter',tf.expand_dims(tf.expand_dims(tf.expand_dims(trainables[keRNL_temporal_filter_index],axis=0),axis=-1),axis=-1))
        tf.summary.image('keRNL_temporal_filter_grad',tf.expand_dims(tf.expand_dims(tf.expand_dims(keRNL_temporal_filter_update,axis=0),axis=-1),axis=-1))
        keRNL_tensor_merged_summary_op=tf.summary.merge_all(scope="keRNL_tensor_summaries")
        
    with tf.name_scope("keRNL_weight_summaries") as scope: 
        # keRNL kernel
        tf.summary.histogram('keRNL_kernel_grad',keRNL_weight_update+1e-10)
        tf.summary.histogram('keRNL_kernel',trainables[keRNL_kernel_index]+1e-10)
        # keRNL bias 
        tf.summary.histogram('keRNL_bias_grad',keRNL_bias_update+1e-10)
        tf.summary.histogram('keRNL_bias',trainables[keRNL_bias_index]+1e-10)
        # keRNL output weight
        tf.summary.histogram('keRNL_output_weight_grad',keRNL_grad_cost_to_output_layer[0]+1e-10)
        tf.summary.histogram('keRNL_output_weights', trainables[keRNL_output_weight_index]+1e-10)
        # keRNL output bias
        tf.summary.histogram('keRNL_output_addition_grad',keRNL_grad_cost_to_output_layer[1]+1e-10)
        tf.summary.histogram('keRNL_output_addition', trainables[keRNL_output_addition_index]+1e-10)
        # keRNL loss 
        tf.summary.scalar('keRNL_loss_output_prediction',keRNL_loss_output_prediction+1e-10)
        tf.summary.scalar('keRNL_accuracy',keRNL_accuracy)
        # keRNL kernel and output weight 
        tf.summary.image('keRNL_kernel',tf.expand_dims(tf.expand_dims(trainables[keRNL_kernel_index],axis=0),axis=-1))
        tf.summary.image('keRNL_kernel_grad',tf.expand_dims(tf.expand_dims(keRNL_weight_update,axis=0),axis=-1))
        tf.summary.image('keRNL_output_weight',tf.expand_dims(tf.expand_dims(trainables[keRNL_output_weight_index],axis=0),axis=-1))
        tf.summary.image('keRNL_output_weight_grad',tf.expand_dims(tf.expand_dims(keRNL_grad_cost_to_output_layer[0],axis=0),axis=-1))
        keRNL_weight_merged_summary_op=tf.summary.merge_all(scope="keRNL_weight_summaries")
    
    init = tf.global_variables_initializer()
    saver = tf.train.Saver()


In [None]:
# verify initialization 
with tf.Session(graph=graph,config=tf.ConfigProto(log_device_placement=True)) as sess : 
    sess.run(init)
    values,trainable_vars = sess.run([variable_names,trainables])
    for k, v in zip(variable_names,values):
        print(["variable: " , k])
        #print(["value: " , v])
        print(["variable: " , np.unicode_.find(k,'output')]) 
        print(["shape: " , v.shape])
        #print(v) 

In [None]:
Path(log_dir).mkdir(exist_ok=True, parents=True)
filelist = [ f for f in os.listdir(log_dir) if f.endswith(".local") ]
for f in filelist:
    os.remove(os.path.join(log_dir, f))

In [None]:
# write graph into tensorboard 
tb_writer = tf.summary.FileWriter(log_dir,graph)
# run a training session 
with tf.Session(graph=graph) as sess:
    sess.run(init)
    for step in range(1,training_steps+1):
        batch_x, batch_y = mnist.train.next_batch(batch_size)
        batch_x=batch_x.reshape((batch_size,timesteps,num_input))
        
        # keRNL state  train 
        keRNL_state_train, keRNL_tensor_loss=sess.run([keRNL_tensor_train_op,keRNL_loss_state_prediction],feed_dict={X:batch_x, Y:batch_y})
        
        # keRNL weight  train 
        keRNL_weight_train, keRNL_loss,keRNL_accu=sess.run([keRNL_weight_train_op,keRNL_loss_output_prediction,keRNL_accuracy],feed_dict={X:batch_x, Y:batch_y})

        # run summaries 
        keRNL_tensor_merged_summary=sess.run(keRNL_tensor_merged_summary_op,feed_dict={X:batch_x, Y:batch_y})
        keRNL_weight_merged_summary=sess.run(keRNL_weight_merged_summary_op,feed_dict={X:batch_x, Y:batch_y})
        
        tb_writer.add_summary(keRNL_tensor_merged_summary, global_step=step)
        tb_writer.add_summary(keRNL_weight_merged_summary, global_step=step)
        # 
        if step % display_step==0 or step==1 : 
            # get batch loss and accuracy 
            print('Step: {}, keRNL tensor Loss {:.9f}, keRNL train Loss: {:.9f},keRNL accu: {:.9f}'.format(step + 1, keRNL_tensor_loss,keRNL_loss,keRNL_accu))


    print("Optimization Finished!")
    #test_data = mnist.test.images[:test_len].reshape((-1, timesteps, num_input))
    #test_label = mnist.test.labels[:test_len]
    #print("Testing Accuracy:", 
    #    sess.run(loss_output_prediction, feed_dict={X: test_data, Y: test_label}))
    save_path = saver.save(sess, log_dir+"/model.ckpt", global_step=step,write_meta_graph=True)
    print("Model saved in path: %s" % save_path)