In [1]:
import tensorflow as tf
import numpy as np
from copy import deepcopy
import matplotlib.pyplot as plt
from IPython import display
from keras.utils import to_categorical
from sklearn import datasets
import sys
sys.path.append('../scripts/')
from utils import *
import plotly.figure_factory as ff
import plotly.plotly as py
import plotly.graph_objs as go
from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot
from sklearn import datasets
from sklearn.utils import resample
import numpy as np
n_samples = 2000

Using TensorFlow backend.


In [2]:
""" Hyperparameters """
n_residual_layers = 1
n_fisher_layers = 1
n_back_residuals = 1

np.random.seed(99)

"""
Two flavors possible. 
'previous'  : Sample from previous layer.
'different' : Sample from a different layer. If so need to assign which layer to sample from in layer_no.
"""
flavor = 'previous'
layer_no = 2
#Hyperparameter to control the amount of loss. (Roughly kept for Fisher Loss to match the scale of Cross-entropy Loss.)
lam = 5000
#In case of varying lambda across layers. 
#lam_array = np.linspace(1/20, 1/8000, num=(n_residual_layers+n_fisher_layers+n_back_residuals)) #Linear decrease#lam_array = [1/20, 1/2000, 1/5000, 1/6000, 1/8000]

input_dim = 2
output_dim = 2
fisher_loss = 0 #Initialise fisher loss to 0. 

#Lists of layer elements. 
weights = []
biases = []
hiddens = []

#In case time sampling.
time_gradients = False
previous_weights = []
previous_biases = []

#If you want to reduce the dimension of input.
reduce_dimension = False
n_dim = 2

In [3]:
'''Generate graph for Fisher Layer. Non-Fisher Layers are 2x2 flow with residuals from previous layer.'''
tf.reset_default_graph()
x = tf.placeholder(tf.float32, shape=[None, 2], name='input')
y_ = tf.placeholder(tf.float32, shape=[None, 2], name='output')

#Manually one layer written for different dimension data set
manual = 0
if reduce_dimension:
    manual = 1
    with tf.name_scope('Reduce_Dimension_Layer'):
        weights.append(weight_variable([input_dim,n_dim], name='feed_in_weight'))
        biases.append(bias_variable([n_dim], name='feed_in_bias'))    
        hiddens.append(tf.nn.sigmoid(tf.add(tf.matmul(x,weights[0]), biases[0])))
        hiddens[-1] = tf.add(x, hiddens[-1], name='residual_0')
        print('Manual single layer done.')
        
#Residuals before the Fisher Layers
if n_residual_layers:
    with tf.name_scope('Pre_Residual_Layer'):
        for i in range(manual, n_residual_layers+manual):
            weights.append(weight_variable([n_dim,n_dim], name='weight_residual_'+str(i)))
            biases.append(bias_variable([n_dim], name='bias_residual_'+str(i)))
            if (len(hiddens)==0):
                hiddens.append(tf.nn.sigmoid(tf.add(tf.matmul(x,weights[-1]), biases[-1])))
                #If residuals needed
                hiddens[-1] = tf.add(x, hiddens[-1], name='residual_0')
            else:
                hiddens.append(tf.nn.sigmoid(tf.add(tf.matmul(hiddens[-1],weights[-1]), biases[-1])))
                #If residuals needed
                hiddens[-1] = tf.add(hiddens[-2], hiddens[-1], name='residual'+str(i))
            print('Residual Layers done', i)

#Fisher Layers
if n_fisher_layers:
    F_accum_weights = []
    F_accum_biases = []
    layer_loss = []
    with tf.name_scope('Fisher_Layer'):
        for i in range(manual+n_residual_layers, n_fisher_layers+n_residual_layers+manual):
            weights.append(weight_variable([n_dim,n_dim], name='fisher_weight_'+str(i)))
            biases.append(bias_variable([n_dim], name='fisher_bias_'+str(i)))
            
            """If doing time-wise change in weights, need to create a copy of previous weights"""
            if time_gradients:
                previous_weights.append(tf.Variable(tf.zeros_like(weights[0]), trainable=False))
                previous_biases.append(tf.Variable(tf.zeros_like(biases[0]), trainable=False))
            #No Residuals           
            """CHANGE IT BACK TO hiddens[-1]"""
            if (len(hiddens) !=0):
                hiddens.append(tf.nn.sigmoid(tf.add(tf.matmul(hiddens[-1] ,weights[-1]), biases[-1]), name='fisher_'+str(i))) #For direct
                #hiddens[-1] = tf.add(hiddens[-2], hiddens[-1])
            elif (len(hiddens) ==0):
                hiddens.append(tf.nn.sigmoid(tf.add(tf.matmul(x ,weights[-1]), biases[-1]), name='fisher_'+str(i))) #For direct
                #hiddens[-1] = tf.add(x, hiddens[-1])
            
            #hiddens.append(tf.nn.sigmoid(tf.add(tf.matmul(hiddens[-1],weights[-1]), biases[-1]), name='fisher_'+str(i))) #With dim_reduction   
            
            #If residuals needed
            """CHANGE IT BACK TO hiddens[-2]"""
            #hiddens[-1] = tf.add(x, hiddens[-1])
            #hiddens[-1] = tf.add(hiddens[-2], hiddens[-1])
            print('Fisher Layers done', i)

#Final Layers
if n_back_residuals:
    with tf.name_scope('Post_Residual_Layer'):
        for i in range(manual+n_fisher_layers+n_residual_layers, n_fisher_layers+n_residual_layers+n_back_residuals+manual):
            weights.append(weight_variable([n_dim,n_dim], name='residual_weight_'+str(i)))
            biases.append(bias_variable([n_dim], name='residual_bias_'+str(i)))
            #No Residuals
            hiddens.append(tf.nn.sigmoid(tf.add(tf.matmul(hiddens[-1],weights[-1]), biases[-1])))
            #If residuals needed
            hiddens[-1] = tf.add(hiddens[-2], hiddens[-1], name='residual_'+str(i))
            print('Back Residual Layers done', i)

            
weights.append(weight_variable([n_dim,output_dim], name='final_weight'))
biases.append(bias_variable([output_dim], name='final_bias'))
#No Residuals
hiddens.append(tf.nn.sigmoid(tf.add(tf.matmul(hiddens[-1],weights[-1]), biases[-1])))

"""Time flavour net ops for copying current weights to previous weights variable"""
# Ops needed if we are going to try the time flavour of FishNet. 
if time_gradients:
    copy_weights = []
    copy_biases = []
    for i in range(n_fisher_layers):
        copy_weights.append(previous_weights[i].assign(weights[1+n_residual_layers+i]))
        copy_biases.append(previous_biases[i].assign(biases[1+n_residual_layers+i])) 
        print('Copied from', 1+n_residual_layers+i, 'to ', i)

        
with tf.name_scope('final_output'):
    y = hiddens[-1]
    #y = tf.add(tf.matmul(hiddens[-1],weights[-1]),biases[-1]) # output layer
with tf.name_scope('xtropy_loss'):
    cross_entropy_loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits_v2(labels=y_, logits=y))
    tf.summary.scalar(name='xtropy_loss', tensor=cross_entropy_loss)

    
'''Graph is done. Now create optimiser for FI from Fish Layers'''    
    
if n_fisher_layers:
    #print(hiddens[4])
    for i in range(manual+n_residual_layers, n_fisher_layers+n_residual_layers+manual):
        with tf.name_scope('layer_fisher'+str(i)):
            #Calculate gradients 
            #Here, sampling is done from outputs of previous layer and weights of next layer. 
            ######Previous Code#######
            #ders_weights = tf.gradients(hiddens[i], weights[i])
            #ders_biases = tf.gradients(hiddens[i], biases[i])

            #Code for sampling from different layers              
            #samples = []
            print(len(hiddens))
            if flavor == 'previous':
                samples = -tf.log(tf.nn.softmax(hiddens[1]))
                print(hiddens[i])
            elif flavor == 'different':
                samples = hiddens[len(hiddens)-1]
                print(hiddens[len(hiddens)-1])

            #######################################
            ders_weights = tf.gradients(tf.convert_to_tensor(samples), weights[i])
            ders_biases = tf.gradients(tf.convert_to_tensor(samples), biases[i])

            F_accum_weights.append([tf.square(ders_weights)])
            F_accum_biases.append([tf.square(ders_biases)])

            weight_sqrs = tf.square(weights[i] - weights[i-1])
            bias_sqrs = tf.square(biases[i] - biases[i-1])

            """If doing time-wise change, calculate Fisher of weighs across epochs"""
            #if time_gradients:
            #    weight_sqrs = tf.square(weights[i] - previous_weights[i-1-n_residual_layers])
            #    print('Subing weights',i,'-',i-1-n_residual_layers)
            #    bias_sqrs = tf.square(biases[i] - previous_biases[i-1-n_residual_layers])

            weight_leftout = tf.multiply(F_accum_weights[-1], weight_sqrs)
            bias_leftout = tf.multiply(F_accum_biases[-1], bias_sqrs)            
            
            """Without the difference between layers"""
            layer_loss.append((lam) * tf.reduce_sum(tf.reduce_sum(F_accum_weights[-1]) + tf.reduce_sum(F_accum_biases[-1]), name='layer_loss_'+str(i)))#tf.multiply(F_accum[i-1],tf.square(var_list[i] - var_list[i-1]))))
            
            ##### Variable lambda across each layer. Initialise an array with varying lamda ####
            #layer_loss.append(lam_array[i-(manual+n_residual_layers)] * tf.reduce_sum(tf.reduce_sum(F_accum_weights[-1]) + tf.reduce_sum(F_accum_biases[-1]), name='layer_loss_'+str(i)))#tf.multiply(F_accum[i-1],tf.square(var_list[i] - var_list[i-1]))))
            """Actual Fisher Loss"""
            #layer_loss.append((lam) * tf.reduce_sum(weight_leftout + bias_leftout, name='layer_loss_'+str(i)))#tf.multiply(F_accum[i-1],tf.square(var_list[i] - var_list[i-1]))))
        print('Loss Graph done', i)

    with tf.name_scope('total_fisher_loss'):
        fisher_loss += tf.reduce_sum(tf.convert_to_tensor(layer_loss))
        tf.summary.scalar(name='fisher_loss', tensor=fisher_loss)
        
    """If we want to have both in a single loss function. Total Loss is sum of crossentropy and each layer Fisher Info"""
    with tf.name_scope('total_loss'):
        total_loss = tf.add(fisher_loss, cross_entropy_loss)
        tf.summary.scalar(name='total_loss', tensor=total_loss)
        
    '''Depending on each experiment'''
# You can play around here to try a different optimizer for the fisher information or keep it part of total loss. Here optimization only on the Fisher Layers. 
    train_fisher_step = tf.train.AdamOptimizer(0.01).minimize(fisher_loss, var_list=[weights[manual+n_residual_layers: n_fisher_layers+n_residual_layers+manual], biases[manual+n_residual_layers: n_fisher_layers+n_residual_layers+manual]])
#with tf.name_scope('optimization'):

train_all_step = tf.train.AdamOptimizer(0.01).minimize(cross_entropy_loss, var_list=[weights, biases])


#train all weight except fisher layer weights
#train_all_step = tf.train.AdamOptimizer(0.01).minimize(cross_entropy_loss, var_list=[weights[0:manual+n_residual_layers], weights[1+n_fisher_layers+n_residual_layers+manual:], biases[0:manual+n_residual_layers], biases[1+n_fisher_layers+n_residual_layers+manual:]])



#train_all_step = tf.train.AdamOptimizer(0.01).minimize(total_loss, var_list=[weights, biases])    
    
correct_prediction = tf.equal(tf.argmax(y,1), tf.argmax(y_,1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
tf.summary.scalar(name='accuracy', tensor=accuracy)
summ_op = tf.summary.merge_all()

Residual Layers done 0
Fisher Layers done 1
Back Residual Layers done 2
4
Tensor("Fisher_Layer/fisher_1:0", shape=(?, 2), dtype=float32)
Loss Graph done 1


In [4]:
'''Try different types of data'''

#print (hiddens)
#print(len(weights[1+n_residual_layers: n_fisher_layers+n_residual_layers+1]), len(biases[1+n_residual_layers: n_fisher_layers+n_residual_layers+1]))
#print(len(weights[0:manual+n_residual_layers]), len(weights[1+n_fisher_layers+n_residual_layers+manual:]) , len(biases[0:manual+n_residual_layers]))

#Play with different datasets.
#feat, labels = datasets.make_classification(n_features=2, n_classes=2, n_redundant=0, n_informative=1, random_state=1, n_clusters_per_class=1, n_samples=n_samples)
#feat, labels = datasets.make_circles(n_samples=400, factor=.3, noise=.05)
#feat, labels = datasets.make_blobs(n_samples=n_samples, centers=2)


feat, labels = datasets.make_moons(n_samples=n_samples, noise=0.05)
#feat, labels = datasets.make_blobs(n_samples=n_samples,cluster_std=[1.0, 2.0], centers=[(0,0), (7,0)])

#Resample a lot
new_feat, new_labels = resample(feat, labels, n_samples=10000, replace=True)
new_labels = to_categorical(new_labels, num_classes=2)

onefeat = feat[np.where(labels==1)]
onelabel = labels[np.where(labels==1)]
new_1feat, new_1labels = resample(onefeat, onelabel, n_samples=10000, replace=True)
new_1labels = to_categorical(new_1labels, num_classes=2)

zerofeat = feat[np.where(labels==0)]
zerolabel = labels[np.where(labels==0)]
new_0feat, new_0labels = resample(zerofeat, zerolabel, n_samples=10000, replace=True)
new_0labels = to_categorical(new_1labels, num_classes=2)

In [5]:
""""Optimization"""
sess = tf.Session()
sess.run(tf.global_variables_initializer())
writer = tf.summary.FileWriter('./fishnetlog', sess.graph)
for i in range(4000):
    if n_fisher_layers:
        _, _ , summar, accu = sess.run([train_all_step, train_fisher_step, summ_op, accuracy], feed_dict={x: new_feat, y_: new_labels})
        #_, accu = sess.run([train_all_step, accuracy], feed_dict={x: new_feat, y_: new_labels})
        #_ = sess.run([train_fisher_step], feed_dict={x: new_0feat, y_: new_0labels})
        #_ = sess.run([train_fisher_step], feed_dict={x: new_0feat, y_: new_0labels})
    else:
        _ , summar, accu = sess.run([train_all_step, summ_op, accuracy], feed_dict={x: new_feat, y_: new_labels})
    writer.add_summary(summar, i)
    if (i%1000 == 0):
        print('Step : ',i,' Accuracy : ',accu)

"""Plotting the Data-Space Tranformation"""
activationsplot = []
activationsplot.append(sess.run(x, feed_dict={x:new_feat}))
for i in hiddens:
    activationsplot.append(sess.run(i, feed_dict={x:new_feat}))
morphs(activationsplot, np.argmax(new_labels, axis=1), skip=1, last_layer=True)

Step :  0  Accuracy :  0.4988
Step :  1000  Accuracy :  0.9836
Step :  2000  Accuracy :  0.9883
Step :  3000  Accuracy :  0.9883
This is the format of your plot grid:
[ (1,1) x1,y1 ]     [ (1,2) x2,y2 ]     [ (1,3) x3,y3 ]     [ (1,4) x4,y4 ]     [ (1,5) x5,y5 ]     [ (1,6) x6,y6 ]     [ (1,7) x7,y7 ]     [ (1,8) x8,y8 ]     [ (1,9) x9,y9 ]     [ (1,10) x10,y10 ]



In [None]:
"""Plotting the Data-Space Tranformation"""
#activationsplot = []
#activationsplot.append(sess.run(x, feed_dict={x:new_feat}))
#for i in hiddens:
#    activationsplot.append(sess.run(i, feed_dict={x:new_feat}))
#morphs(activationsplot, np.argmax(new_labels, axis=1), skip=1, last_layer=True)

In [None]:
"""Optionally save the model"""
#saver = tf.train.Saver()
#saver.save(sess, './model/')

In [6]:
"""Quiver plots of data space"""
#Generate a small portion of dataset for lesser crowding in quiver plots.
plot_feat, plot_labels = resample(feat, labels, n_samples=100, replace=True)
mesh = [(a,b) for a in np.arange(-10, 10, 0.5) for b in np.arange(-10, 10, 0.5)]
mesh = np.asarray(mesh)
print(mesh.shape)
def morphs_nolab3(activations2, skip=2, last_layer=False):
    #If last dimension is something other than 2, change it. otherwise 0.
    trace_list = []
    n_rows = 1 if ((len(activations2)) < 10) else -(-len(activations)//10)
    fig3 = tools.make_subplots(rows=n_rows, cols=(10))
    last_dim = (2 if last_layer==False else 0)
    for i in range(1, len(activations2)):
        temp_fig = ff.create_quiver(activations2[i][:,0], activations2[i][:,1], activations2[i][:,0]- activations2[i-1][:,0], activations2[i][:,1]-activations2[i-1][:,1],line=dict(width=1.0))
        fig3.append_trace(temp_fig['data'][0], -(-i//10), (10 if i%10==0 else i%10))
    fig3['layout'].update(height=n_rows*400)
    fig3['layout'].update(width=3000)
    plot(fig3)

meshVelocities = []
for i in hiddens:
    meshVelocities.append(sess.run(i, feed_dict={x:plot_feat}))
morphs_nolab3(meshVelocities, skip=1, last_layer=True)

(1600, 2)
This is the format of your plot grid:
[ (1,1) x1,y1 ]     [ (1,2) x2,y2 ]     [ (1,3) x3,y3 ]     [ (1,4) x4,y4 ]     [ (1,5) x5,y5 ]     [ (1,6) x6,y6 ]     [ (1,7) x7,y7 ]     [ (1,8) x8,y8 ]     [ (1,9) x9,y9 ]     [ (1,10) x10,y10 ]



In [7]:
"""Heat Map of parameter space"""
params = sess.run(tf.trainable_variables())
only_weights = []
for i in range (0,len(params), 2):
    only_weights.append(params[i])
def morphs_heat(activations2):
    trace_list = []
    n_rows = 1 if ((len(activations2)) < 10) else -(-len(activations)//10)
    fig3 = tools.make_subplots(rows=n_rows, cols=(10))
    #last_dim = (2 if last_layer==False else 0)
    for i in range(1, len(activations2)):
        temp_fig = go.Heatmap(z=activations2[i])
        fig3.append_trace(temp_fig, -(-i//10), (10 if i%10==0 else i%10))
    fig3['layout'].update(height=n_rows*400)
    fig3['layout'].update(width=3000)
    plot(fig3)
morphs_heat(only_weights)

This is the format of your plot grid:
[ (1,1) x1,y1 ]     [ (1,2) x2,y2 ]     [ (1,3) x3,y3 ]     [ (1,4) x4,y4 ]     [ (1,5) x5,y5 ]     [ (1,6) x6,y6 ]     [ (1,7) x7,y7 ]     [ (1,8) x8,y8 ]     [ (1,9) x9,y9 ]     [ (1,10) x10,y10 ]



In [34]:
keras.backend.set_session(sess)

TypeError: Input tensors to a Model must be Keras tensors. Found: Tensor("Tanh:0", shape=(?, 2), dtype=float32) (missing Keras metadata).

In [86]:
feat2, labels2 = datasets.make_moons(n_samples=n_samples, noise=0.05)
#feat, labels = datasets.make_blobs(n_samples=n_samples, centers=2)
#new_feat, new_labels = resample(feat, labels, n_samples=10000, replace=True)
#onefeat = featsome.where
#onelabel = []
#for (i,j) in zip(featsome, labelssome):
#    if j == 1:
#        onefeat

onefeat = feat2[np.where(labels2==1)]
onelabel = labels2[np.where(labels2==1)]
print(feat2.shape)
print(onefeat.shape, onelabel.shape)

(2000, 2)
(1000, 2) (1000,)


In [90]:
new_onelabelsom = to_categorical(onelabel, num_classes=2)
new_onelabelsom

array([[0., 1.],
       [0., 1.],
       [0., 1.],
       ...,
       [0., 1.],
       [0., 1.],
       [0., 1.]], dtype=float32)

In [6]:
feat_small.shape

(2000,)