In [1]:
import tensorflow as tf
import numpy as np
from copy import deepcopy
import matplotlib.pyplot as plt
from IPython import display
from keras.utils import to_categorical
from sklearn import datasets
import sys
sys.path.append('../scripts/')
from utils import *

Using TensorFlow backend.


In [57]:
n_residual_layers = 0
n_fisher_layers = 1
n_back_residuals = 4
np.random.seed(99)
n_dim = 2
fisher_loss = 0

flavor = 'previous'
layer_no = 2


#Hyperparameter to control the amount of loss. (Roughly kept for Fisher Loss to match the scale of Cross-entropy Loss.)
lam = 1/1000
input_dim = 2
output_dim = 2

tf.reset_default_graph()
x = tf.placeholder(tf.float32, shape=[None, 2], name='input')
y_ = tf.placeholder(tf.float32, shape=[None, 2], name='output')
weights = []
biases = []
hiddens = []

previous_weights = []
previous_biases = []

time_gradients = False
reduce_dimension = True


#Manually one layer written for different dimension data set
manual = 1
if reduce_dimension:
    with tf.name_scope('Reduce_Dimension_Layer'):
        weights.append(weight_variable([input_dim,n_dim], name='feed_in_weight'))
        biases.append(bias_variable([n_dim], name='feed_in_bias'))    
        hiddens.append(tf.nn.relu(tf.add(tf.matmul(x,weights[0]), biases[0])))
        #hiddens[-1] = tf.add(x, hiddens[-1], name='residual_0')
        print('Manual single layer done.')

        


if n_residual_layers:
    with tf.name_scope('Pre_Residual_Layer'):
        for i in range(manual, n_residual_layers+manual):
            weights.append(weight_variable([n_dim,n_dim], name='weight_residual_'+str(i)))
            biases.append(bias_variable([n_dim], name='bias_residual_'+str(i)))
            #No Residuals
            hiddens.append(tf.nn.tanh(tf.add(tf.matmul(hiddens[-1],weights[-1]), biases[-1])))
            #If residuals needed
            hiddens[-1] = tf.add(hiddens[-2], hiddens[-1], name='residual'+str(i))
            print('Residual Layers done', i)

if n_fisher_layers:
    F_accum_weights = []
    F_accum_biases = []
    layer_loss = []
    with tf.name_scope('Fisher_Layer'):
        for i in range(manual+n_residual_layers, n_fisher_layers+n_residual_layers+manual):
            weights.append(weight_variable([n_dim,n_dim], name='fisher_weight_'+str(i)))
            biases.append(bias_variable([n_dim], name='fisher_bias_'+str(i)))
            
            """If doing time-wise change in weights, need to create a copy of previous weights"""
            if time_gradients:
                previous_weights.append(tf.Variable(tf.zeros_like(weights[0]), trainable=False))
                previous_biases.append(tf.Variable(tf.zeros_like(biases[0]), trainable=False))

            #No Residuals
            
            """CHANGE IT BACK TO hiddens[-1]"""
            #hiddens.append(tf.nn.tanh(tf.add(tf.matmul(x ,weights[-1]), biases[-1]), name='fisher_'+str(i))) #For direct
            
            hiddens.append(tf.nn.tanh(tf.add(tf.matmul(hiddens[-1],weights[-1]), biases[-1]), name='fisher_'+str(i))) #With dim_reduction
            
            
            #If residuals needed
            """CHANGE IT BACK TO hiddens[-2]"""
            #hiddens[-1] = tf.add(hiddens[-2], hiddens[-1])
            print('Fisher Layers done', i)
        
if n_back_residuals:
    with tf.name_scope('Post_Residual_Layer'):
        for i in range(manual+n_fisher_layers+n_residual_layers, n_fisher_layers+n_residual_layers+n_back_residuals+manual):
            weights.append(weight_variable([n_dim,n_dim], name='residual_weight_'+str(i)))
            biases.append(bias_variable([n_dim], name='residual_bias_'+str(i)))
            #No Residuals
            hiddens.append(tf.nn.tanh(tf.add(tf.matmul(hiddens[-1],weights[-1]), biases[-1])))
            #If residuals needed
            hiddens[-1] = tf.add(hiddens[-2], hiddens[-1], name='residual_'+str(i))
            print('Back Residual Layers done', i)

            
weights.append(weight_variable([n_dim,output_dim], name='final_weight'))
biases.append(bias_variable([output_dim], name='final_bias'))
#No Residuals
hiddens.append(tf.nn.tanh(tf.add(tf.matmul(hiddens[-1],weights[-1]), biases[-1])))


"""Ops for copying current weights to previous weights variable"""
# Ops needed if we are going to try the time flavour of FishNet. 
if time_gradients:
    copy_weights = []
    copy_biases = []
    for i in range(n_fisher_layers):
        copy_weights.append(previous_weights[i].assign(weights[1+n_residual_layers+i]))
        copy_biases.append(previous_biases[i].assign(biases[1+n_residual_layers+i])) 
        print('Copied from', 1+n_residual_layers+i, 'to ', i)

        
with tf.name_scope('final_output'):
    y = hiddens[-1]
    #y = tf.add(tf.matmul(hiddens[-1],weights[-1]),biases[-1]) # output layer
with tf.name_scope('xtropy_loss'):
    cross_entropy_loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits_v2(labels=y_, logits=y))
    tf.summary.scalar(name='xtropy_loss', tensor=cross_entropy_loss)

    
#Have a seperate optimizer for Fisher Information from each layer.
if n_fisher_layers:
    print(hiddens[4])
    for i in range(manual+n_residual_layers, n_fisher_layers+n_residual_layers+manual):
        with tf.name_scope('layer_fisher'+str(i)):
            #Calculate gradients 
            #Here, sampling is done from outputs of previous layer and weights of next layer. 

            ######Previous Code#######
            #ders_weights = tf.gradients(hiddens[i], weights[i])
            #ders_biases = tf.gradients(hiddens[i], biases[i])

            #Code for sampling from different layers              
            #samples = []
            if flavor == 'previous':
                samples = hiddens[i]
                print(hiddens[i])
            elif flavor == 'different':
                samples = hiddens[len(hiddens)-1]
                print(hiddens[len(hiddens)-1])

            #######################################
            ders_weights = tf.gradients(tf.convert_to_tensor(samples), weights[i])
            ders_biases = tf.gradients(tf.convert_to_tensor(samples), biases[i])

            F_accum_weights.append([tf.square(ders_weights)])
            F_accum_biases.append([tf.square(ders_biases)])

            weight_sqrs = tf.square(weights[i] - weights[i-1])
            bias_sqrs = tf.square(biases[i] - biases[i-1])

            """If doing time-wise change, calculate Fisher of weighs across epochs"""
            if time_gradients:
                weight_sqrs = tf.square(weights[i] - previous_weights[i-1-n_residual_layers])
                print('Subing weights',i,'-',i-1-n_residual_layers)
                bias_sqrs = tf.square(biases[i] - previous_biases[i-1-n_residual_layers])

            weight_leftout = tf.multiply(F_accum_weights[-1], weight_sqrs)
            bias_leftout = tf.multiply(F_accum_biases[-1], bias_sqrs)

            layer_loss.append((lam/2) * tf.reduce_sum(weight_leftout + bias_leftout, name='layer_loss_'+str(i)))#tf.multiply(F_accum[i-1],tf.square(var_list[i] - var_list[i-1]))))
        print('Loss Graph done', i)

    with tf.name_scope('total_fisher_loss'):
        fisher_loss += tf.reduce_sum(tf.convert_to_tensor(layer_loss))
        tf.summary.scalar(name='fisher_loss', tensor=fisher_loss)
    
    with tf.name_scope('total_loss'):
        total_loss = tf.add(fisher_loss, cross_entropy_loss)
        tf.summary.scalar(name='total_loss', tensor=total_loss)
        
# You can play around here to try a different optimizer for the fisher information or keep it part of total loss. Here optimization only on the Fisher Layers. 
    train_fisher_step = tf.train.AdamOptimizer(0.0001).minimize(fisher_loss, var_list=[weights[manual+n_residual_layers: n_fisher_layers+n_residual_layers+manual], biases[manual+n_residual_layers: n_fisher_layers+n_residual_layers+manual]])
#with tf.name_scope('optimization'):



#train_all_step = tf.train.AdamOptimizer(0.01).minimize(cross_entropy_loss, var_list=[weights, biases])


#train all weight except fisher layer weights
train_all_step = tf.train.AdamOptimizer(0.01).minimize(cross_entropy_loss, var_list=[weights[0:manual+n_residual_layers], weights[1+n_fisher_layers+n_residual_layers+manual:], biases[0:manual+n_residual_layers], biases[1+n_fisher_layers+n_residual_layers+manual:]])



#train_all_step = tf.train.AdamOptimizer(0.001).minimize(total_loss, var_list=[weights, biases])
    
    
correct_prediction = tf.equal(tf.argmax(y,1), tf.argmax(y_,1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
tf.summary.scalar(name='accuracy', tensor=accuracy)
summ_op = tf.summary.merge_all()

Manual single layer done.
Fisher Layers done 1
Back Residual Layers done 2
Back Residual Layers done 3
Back Residual Layers done 4
Back Residual Layers done 5
Tensor("Post_Residual_Layer/residual_4:0", shape=(?, 2), dtype=float32)
Tensor("Fisher_Layer/fisher_1:0", shape=(?, 2), dtype=float32)
Loss Graph done 1


In [45]:
#tf.get_collection(tf.GraphKeys.LOSSES, scope='Fisher_Layer')
print(len(weights[1+n_residual_layers: n_fisher_layers+n_residual_layers+1]), len(biases[1+n_residual_layers: n_fisher_layers+n_residual_layers+1]))
print(len(weights[0:manual+n_residual_layers]), len(weights[1+n_fisher_layers+n_residual_layers+manual:]) , len(biases[0:manual+n_residual_layers]))


1 1
3 3 3


In [58]:
from sklearn import datasets
from sklearn.utils import resample
import numpy as np
n_samples = 2000

#Play with different datasets.

#feat, labels = datasets.make_classification(n_features=2, n_classes=2, n_redundant=0, n_informative=1, random_state=1, n_clusters_per_class=1, n_samples=n_samples)
feat, labels = datasets.make_moons(n_samples=n_samples, noise=0.05)
#feat, labels = datasets.make_circles(n_samples=400, factor=.3, noise=.05)
#feat, labels = datasets.make_blobs(n_samples=n_samples)


new_feat, new_labels = resample(feat, labels, n_samples=10000, replace=True)


new_labels = to_categorical(new_labels, num_classes=2)

#labels = labels.reshape([-1,1])
#print(feat.shape, labels.shape)
#((x,y) for zip(feat,labels) if labels==1)
#feat_small = feat[np.where(labels==1)]
# lab_small = np.zeros(feat_small.shape[0])
# lab_small = to_categorical(lab_small, num_classes=1)
# print(feat_small.shape, lab_small.shape)
#feat_small, label = ((x,y) for x,y in zip(feat,labels) if y==1)


#feat, labels = datasets.make_blobs(n_samples=n_samples)
labels = to_categorical(labels, num_classes=2)
print(feat.shape, labels.shape)
sess = tf.Session()
sess.run(tf.global_variables_initializer())

#_, summar, accu = sess.run([train_step, summ_op, accuracy], feed_dict={x: feat, y_: labels})


#for i in len(copy_weights):
#    sess.run([copy_weights[i], copy_biases[i]])

writer = tf.summary.FileWriter('./fishnetlog', sess.graph)

for i in range(3000):
    if n_fisher_layers:
        _, _ , summar, accu = sess.run([train_all_step,train_fisher_step, summ_op, accuracy], feed_dict={x: new_feat, y_: new_labels})
    else:
        _ , summar, accu = sess.run([train_all_step, summ_op, accuracy], feed_dict={x: new_feat, y_: new_labels})
    #if n_fisher_layers:
    #    _ = sess.run([train_fisher_step], feed_dict={x: new_feat, y_: new_labels})
    
    #for j in range(len(copy_weights)):
    #    sess.run([copy_weights[j], copy_biases[j]])
    writer.add_summary(summar, i)
    if (i%1000 == 0):
        print('Step : ',i,' Accuracy : ',accu)
    #print(accu)
    #model.train_step.run(feed_dict={x: feat, y_: labels})
    #print(sess.run([accuracy], feed_dict={x: feat, y_: labels}))
activations = []
activations.append(sess.run(x, feed_dict={x:new_feat}))
for i in hiddens:
    activations.append(sess.run(i, feed_dict={x:new_feat}))
morphs(activations, np.argmax(new_labels, axis=1), skip=1, last_layer=True)

(2000, 2) (2000, 2)
Step :  0  Accuracy :  0.4988
Step :  1000  Accuracy :  0.8838
Step :  2000  Accuracy :  0.8851
This is the format of your plot grid:
[ (1,1) x1,y1 ]     [ (1,2) x2,y2 ]     [ (1,3) x3,y3 ]     [ (1,4) x4,y4 ]     [ (1,5) x5,y5 ]     [ (1,6) x6,y6 ]     [ (1,7) x7,y7 ]     [ (1,8) x8,y8 ]     [ (1,9) x9,y9 ]     [ (1,10) x10,y10 ]



In [86]:
saver = tf.train.Saver()
saver.save(sess, './model/')

'./model/'

In [69]:
print(len(hiddens))

7


In [52]:
mesh = [(a,b) for a in np.arange(-10, 10, 1) for b in np.arange(-10, 10, 1)]
mesh = np.asarray(mesh)
print(mesh.shape)
def morphs_nolab(activations, skip=2, last_layer=False):
    #If last dimension is something other than 2, change it. otherwise 0.
    trace_list = []
    n_rows = 1 if ((len(activations)) < 10) else -(-len(activations)//10)
    fig = tools.make_subplots(rows=n_rows, cols=(10))
    last_dim = (2 if last_layer==False else 0)
    for i in range(1, len(activations)):
        #print((-(-i//10)), i%10, i-1, (i//2)+1)
        #        fig.append_trace(go.Scatter(x=activations[i-1][:,0], y = activations[i-1][:,1] , mode='markers'), -(-i//10), (10 if i%10==0 else i%10))
        temp_fig = ff.create_quiver(activations[i-1][:,0], activations[i-1][:,1], activations[i][:,0], activations[i][:,1],line=dict(width=1))
        fig.append_trace(temp_fig['data'][0], -(-i//10), (10 if i%10==0 else i%10))
    fig['layout'].update(height=n_rows*400)
    fig['layout'].update(width=3000)
    plot(fig)

meshVelocities = []
for layer in hiddens:
    meshVelocities.append(sess.run(layer, feed_dict={x:mesh}))
#for layer in range(len(model.layers)):
#    extract_func = get_layer(0,layer, model)
#    meshVelocities.append(extract_func([mesh])[0])
#print(len(meshVelocities))
morphs_nolab(meshVelocities, skip=1, last_layer=True)

(400, 2)
This is the format of your plot grid:
[ (1,1) x1,y1 ]     [ (1,2) x2,y2 ]     [ (1,3) x3,y3 ]     [ (1,4) x4,y4 ]     [ (1,5) x5,y5 ]     [ (1,6) x6,y6 ]     [ (1,7) x7,y7 ]     [ (1,8) x8,y8 ]     [ (1,9) x9,y9 ]     [ (1,10) x10,y10 ]

