In [145]:
#import graph, coarsening, utils
%load_ext autoreload
%autoreload 1
%aimport graph
%aimport coarsening
%aimport utils

import tensorflow as tf
import time, shutil
import numpy as np
import os, collections, sklearn
import scipy.sparse as sp
import matplotlib.pyplot as plt
import networkx as nx
import scipy.sparse
%matplotlib inline

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [2]:
#Definition of some flags useful later in the code

flags = tf.app.flags
FLAGS = flags.FLAGS
tf.app.flags.DEFINE_string('f', '', 'kernel')
# Graphs.
flags.DEFINE_integer('number_edges', 8, 'Graph: minimum number of edges per vertex.')
flags.DEFINE_string('metric', 'euclidean', 'Graph: similarity measure (between features).')
flags.DEFINE_bool('normalized_laplacian', True, 'Graph Laplacian: normalized.')
flags.DEFINE_integer('coarsening_levels', 4, 'Number of coarsened graphs.')

# Directories.
flags.DEFINE_string('dir_data', 'data_mnist', 'Directory to store data.')

In [239]:
def get_U(dist2,idx,nodes_coordinates):
    dist2 = get_dist(nodes_coordinates,idx)
    U_x = dist2[:,0]
    U_y = dist2[:,1]
    M = idx.shape[0]
    I = np.arange(0, M).repeat(8)
    Ax = sp.coo_matrix((U_x,(I,idx.flatten())),shape=(M,M))
    Ay = sp.coo_matrix((U_y,(I,idx.flatten())),shape=(M,M))
    A2 = [Ax,Ay]
    return A2

In [240]:
#Here we proceed at computing the original grid where the images live and the various coarsening that are applied
#for each level

def grid_graph(m,raw=False):
    z = graph.grid(m)  # normalized nodes coordinates
    dist, idx = graph.distance_sklearn_metrics(z, k=FLAGS.number_edges, metric=FLAGS.metric) 
    #dist contains the distance of the 8 nearest neighbors for each node indicated in z sorted in ascending order
    #idx contains the indexes of the 8 nearest for each node sorted in ascending order by distance
    A = graph.adjacency(dist, idx) 
    return A, z,idx,dist

def get_dist(frm,to):
    dist =[]
    for i,from_node in enumerate(frm):
       neighbor_idx =  to[i]
       neighbors = frm[neighbor_idx]
       for n_idx,n in zip(neighbor_idx,neighbors):
           diff = from_node-n
           dist.append(diff)
    #             dist[tuple(from_node.tolist()+[n_idx])] = diff
    dist = np.vstack(dist)
    return dist

def get_U(dist2,idx,nodes_coordinates):
    dist2 = get_dist(nodes_coordinates,idx)
    U_x = dist2[:,0]
    U_y = dist2[:,1]
    M = idx.shape[0]
    I = np.arange(0, M).repeat(8)
    Ax = sp.coo_matrix((U_x,(I,idx.flatten())),shape=(M,M))
    Ay = sp.coo_matrix((U_y,(I,idx.flatten())),shape=(M,M))
    A2 = [Ax,Ay]
    return A2

def coarsen(A, levels):
    graphs, parents = coarsening.metis(A, levels) #Coarsen a graph multiple times using Graclus variation of the METIS algorithm. 
                                                  #Basically, we randomly sort the nodes, we iterate on them and we decided to group each node
                                                  #with the neighbor having highest w_ij * 1/(\sum_k w_ik) + w_ij * 1/(\sum_k w_kj) 
                                                  #i.e. highest sum of probabilities to randomly walk from i to j and from j to i.
                                                  #We thus favour strong connections (i.e. the ones with high weight wrt all the others for both nodes) 
                                                  #in the choice of the neighbor of each node.
                    
                                                  #Construction is done a priori, so we have one graph for all the samples!
                    
                                                  #graphs = list of spare adjacency matrices (it contains in position 
                                                  #          0 the original graph)
                                                  #parents = list of numpy arrays (every array in position i contains 
                                                  #           the mapping from graph i to graph i+1, i.e. the idx of
                                                  #           node i in the coarsed graph -> that is, the idx of its cluster) 
    perms = coarsening.compute_perm(parents) #Return a list of indices to reorder the adjacency and data matrices so
                                             #that two consecutive nodes correspond to neighbors that should be collapsed
                                             #to produce the coarsed version of the graph.
                                             #Fake nodes are appended for each node which is not grouped with anybody else
    laplacians = []
    
    U = []
    for i,A in enumerate(graphs):
        print(i)
        M, M = A.shape
        graph
        A, nodes_coordinates,idx,dist = grid_graph(int(M**.5))
#         dist2 = get_dist(nodes_coordinates,idx)
        A2 = get_U(dist,idx,nodes_coordinates)
#         new_A.append(coarsening.perm_adjacency(A_raw,perms[i]))
        
        
        
        # We remove self-connections created by metis.
        A = A.tocoo()
        A.setdiag(0)

        if i < levels: #if we have to pool the graph 
            print(i)
            A = coarsening.perm_adjacency(A, perms[i]) #matrix A is here extended with the fakes nodes
                                                       #in order to do an efficient pooling operation
                                                       #in tensorflow as it was a 1D pooling
#             new_A.append(coarsening.perm_adjacency(A_raw,perms[i]))
            Ux = coarsening.perm_adjacency(A2[0],perms[i])
            Uy = coarsening.perm_adjacency(A2[1],perms[i])
            U.append([Ux,Uy])
        A = A.tocsr()
        A.eliminate_zeros()
        Mnew, Mnew = A.shape
        print('Layer {0}: M_{0} = |V| = {1} nodes ({2} added), |E| = {3} edges'.format(i, Mnew, Mnew-M, A.nnz//2))

        L = graph.laplacian(A, normalized=FLAGS.normalized_laplacian)
        laplacians.append(A)

    return laplacians, perms[0] if len(perms) > 0 else None, perms, U

t_start = time.time()

np.random.seed(0)
n_rows_cols = 28
A, nodes_coordinates,idx,dist = grid_graph(n_rows_cols,True)
dist2 = get_dist(nodes_coordinates,idx)
A2 = get_U(dist2,idx)
_, perm,perms,U = coarsen(A,FLAGS.coarsening_levels)

print('Execution time: {:.2f}s'.format(time.time() - t_start))

# graph.plot_spectrum(L)  # plateau is due to all the disconnected fake nodes 
# !!!! the plateau is in correspondence of 1 and not 0 because we are using a variation of the normalized laplacian which admits a degree different from 0 even in 
# correspondence of disconnected nodes (normalized laplacian D^-0.5 * L * D^-0.5 = I - D^-0.5 * A * D^-0.5 doesn't exist for D_i==0!).
# With normalized_laplacian=False the plateau is indeed in correspondence of 0 (as it should) !!!!

TypeError: get_U() missing 1 required positional argument: 'nodes_coordinates'

In [229]:
def get_dist(frm,to):
#     dist = {}
   dist =[]
   for i,from_node in enumerate(frm):
       neighbor_idx =  to[i]
       neighbors = frm[neighbor_idx]
       for n_idx,n in zip(neighbor_idx,neighbors):
           diff = from_node-n
           dist.append(diff)
#             dist[tuple(from_node.tolist()+[n_idx])] = diff
   dist = np.vstack(dist)
   return dist

def get_U(dist2,idx):
    dist2 = get_dist(nodes_coordinates,idx)
    U_x = dist2[:,0]
    U_y = dist2[:,1]
    M = idx.shape[0]
    I = np.arange(0, M).repeat(8)
    Ax = sp.coo_matrix((U_x,(I,idx.flatten())),shape=(M,M))
    Ay = sp.coo_matrix((U_y,(I,idx.flatten())),shape=(M,M))
    A2 = [Ax,Ay]
    return A2


dist2 = get_dist(nodes_coordinates,idx)
A2 = get_U(dist2,idx)
# U_x = dist2[:,0]
# U_y = dist2[:,1]
# M = idx.shape[0]
# I = np.arange(0, M).repeat(8)
# Ax = sp.coo_matrix((U_x,(I,idx.flatten())),shape=(M,M))
# Ay = sp.coo_matrix((U_y,(I,idx.flatten())),shape=(M,M))
# A2 = [Ax,Ay]

In [None]:
def get_U(dist2,idx):
    dist2 = get_dist(nodes_coordinates,idx)
    U_x = dist2[:,0]
    U_y = dist2[:,1]
    M = idx.shape[0]
    I = np.arange(0, M).repeat(8)
    Ax = sp.coo_matrix((U_x,(I,idx.flatten())),shape=(M,M))
    Ay = sp.coo_matrix((U_y,(I,idx.flatten())),shape=(M,M))
    A2 = [Ax,Ay]
    return A2

## dist2.shape

In [211]:
M = idx.shape[0]
I = np.arange(0, M).repeat(8)

In [224]:
Ax = sp.coo_matrix((U_x,(I,idx.flatten())),shape=(M,M))
Ay = sp.coo_matrix((U_y,(I,idx.flatten())),shape=(M,M))
A2 = [Ax,Ay]

In [7]:
#loading of MNIST dataset

from tensorflow.examples.tutorials.mnist import input_data
mnist = input_data.read_data_sets(FLAGS.dir_data, one_hot=False)

train_data = mnist.train.images.astype(np.float32)
val_data = mnist.validation.images.astype(np.float32) #the first 5K samples of the training dataset 
                                                      #are used for validation
test_data = mnist.test.images.astype(np.float32)
train_labels = mnist.train.labels
val_labels = mnist.validation.labels
test_labels = mnist.test.labels

t_start = time.time()
train_data = coarsening.perm_data(train_data, perm)
val_data = coarsening.perm_data(val_data, perm)
test_data = coarsening.perm_data(test_data, perm)
print('Execution time: {:.2f}s'.format(time.time() - t_start))
del perm

Instructions for updating:
Please use alternatives such as official/mnist/dataset.py from tensorflow/models.
Instructions for updating:
Please write your own downloading logic.
Instructions for updating:
Please use urllib or similar directly.
Successfully downloaded train-images-idx3-ubyte.gz 9912422 bytes.
Instructions for updating:
Please use tf.data to implement this functionality.
Extracting data_mnist/train-images-idx3-ubyte.gz
Successfully downloaded train-labels-idx1-ubyte.gz 28881 bytes.
Instructions for updating:
Please use tf.data to implement this functionality.
Extracting data_mnist/train-labels-idx1-ubyte.gz
Successfully downloaded t10k-images-idx3-ubyte.gz 1648877 bytes.
Extracting data_mnist/t10k-images-idx3-ubyte.gz
Successfully downloaded t10k-labels-idx1-ubyte.gz 4542 bytes.
Extracting data_mnist/t10k-labels-idx1-ubyte.gz
Instructions for updating:
Please use alternatives such as official/mnist/dataset.py from tensorflow/models.
Execution time: 1.77s


In [8]:
class ChebNet:
    """
    The neural network model.
    """
    
    #Helper functions used for constructing the model
    def _weight_variable(self, shape,substr, regularization=True): 
        """Initializer for the weights"""
        
        initial = tf.truncated_normal_initializer(0, 0.1)
        var = tf.get_variable('weights'+substr, shape, tf.float32, initializer=initial)
        if regularization: #append the loss of the current variable to the regularization term 
            self.regularizers.append(tf.nn.l2_loss(var))
        return var
    
    def _bias_variable(self, shape, regularization=True):
        """Initializer for the bias"""
        
        initial = tf.constant_initializer(0.1)
        var = tf.get_variable('bias', shape, tf.float32, initializer=initial)
        if regularization:
            self.regularizers.append(tf.nn.l2_loss(var))
        return var
    

    def frobenius_norm(self, tensor): 
        """Computes the frobenius norm for a given tensor"""
        
        square_tensor = tf.square(tensor)
        tensor_sum = tf.reduce_sum(square_tensor)
        frobenius_norm = tf.sqrt(tensor_sum)
        return frobenius_norm
    
    
    def count_no_weights(self):
        total_parameters = 0
        for variable in tf.trainable_variables():
            # shape is an array of tf.Dimension
            shape = variable.get_shape()
            variable_parameters = 1
            for dim in shape:
                variable_parameters *= dim.value
            total_parameters += variable_parameters
        print('#weights in the model: %d' % (total_parameters,))

    def monetConv(self,x,U,Fout,K):
        N, M, Fin = x.get_shape()  # N is the number of images
                                   # M the number of vertices in the images
                                   # Fin the number of features
        N, M, Fin = int(N), int(M), int(Fin)
        gaussian_kernels =[]
        for j in range(K):
            W = self._weight_variable([Fin, Fout],substr="weight{}".format(j), regularization=False)
            mu = self._weight_variable([1],substr="mu{}".format(j), regularization=False)
            sigma = self._weight_variable([1],substr="sigma{}".format(j), regularization=False)
            d_u = tf.sparse.to_dense(U)
            zero = tf.constant(0, dtype=tf.float32)
            bools = tf.not_equal(d_u, zero)
            mask = tf.cast(bools,dtype=tf.float32)
            gaussian= tf.exp((-.5*tf.square((d_u-mu))/ (tf.square(sigma))))*mask
            #N, M, D -> M,N,D
            x_ = tf.transpose(x,[1,0,2])
            
            #M,N,D->
            x_r = tf.reshape(x,[x_.shape[0],-1])
            patch = tf.matmul(gaussian,x_r)
            patch = tf.reshape(patch,[M*N,Fin])
            kernel = tf.matmul(patch,W)
            kernel = tf.reshape(kernel,[M,N,Fout])
            gaussian_kernels.append(kernel)

        new_x = tf.add_n(gaussian_kernels)
        new_x = tf.transpose(new_x,[1,0,2])
        return new_x
    
    def monetConv(self,x,U,Fout,K):
        N, M, Fin = x.get_shape()  # N is the number of images
                                   # M the number of vertices in the images
                                   # Fin the number of features
        N, M, Fin = int(N), int(M), int(Fin)
    
    
        j = M
        W = self._weight_variable([Fin, Fout],substr="weight{}".format(j), regularization=False)
        mu = self._weight_variable([1,K],substr="mu{}".format(j), regularization=False)
        sigma = self._weight_variable([1,K],substr="sigma{}".format(j), regularization=False)
        d_u = tf.sparse.to_dense(U)
        d_u = tf.expand_dims(d_u,[2])
        zero = tf.constant(0, dtype=tf.float32)
        bools = tf.not_equal(d_u, zero)
        
        mask = tf.cast(bools,dtype=tf.float32)
        

        #M,M,1
        gaussian= tf.exp((-.5*tf.square((d_u-mu))/ (1e-14 + tf.square(sigma))))*mask
        #N, M, D -> M,N,D
        x_ = tf.transpose(x,[1,0,2])
        #M,N,D->
        x_r = tf.reshape(x,[x_.shape[0],-1])
        #M,N*D
        gaussian = tf.transpose(gaussian,[2,0,1])
        gaussian = tf.reshape(gaussian,[-1,M])
        patch = tf.matmul(gaussian,x_r)
        patch = tf.reshape(patch,[K*M*N,Fin])
        
        kernel = tf.matmul(patch,W)
        kernel = tf.reshape(kernel,[K,M,batch_size,Fout])

        new_x = tf.reduce_sum(kernel,0)
        new_x = tf.transpose(new_x,[1,0,2])
        return new_x
        
    #Modules used by the graph convolutional network
    def chebyshevConv(self, x, L, Fout, K): 
        """Applies chebyshev polynomials over the graph (i.e. it makes a spectral convolution)"""
        
        N, M, Fin = x.get_shape()  # N is the number of images
                                   # M the number of vertices in the images
                                   # Fin the number of features
        N, M, Fin = int(N), int(M), int(Fin)
        
        # Transform to Chebyshev basis. Here we apply the chebyshev polynomials. For simplicity instead of operating
        # over the eigenvalues of the laplacians we apply directly the chebysev polynomials to the laplacians.
        # This makes everything more efficient if the laplacians are actually represented as sparse matrices.
        x0 = x # L^0 * x = I * x
        x = tf.expand_dims(x0, 0)  # shape = 1 x N x M x Fin
        def concat(x, x_):
            x_ = tf.expand_dims(x_, 0)  # shape = 1 x M x Fin*N
            return tf.concat([x, x_], 0)  # shape = 1 x N x M x Fin
        if K > 1:
            x0 = tf.transpose(x0, [1,2,0])
            x0 = tf.reshape(x0, [M, Fin*N])
            x1 = tf.sparse_tensor_dense_matmul(L, x0) #shape = M x Fin*N <- L^1 * x
            x1 = tf.reshape(x1, [M, Fin, N]) # shape = M x Fin x N <- L^1 * x
            x1 = tf.transpose(x1, [2,0,1]) # shape = N x M x Fin
            x0 = tf.reshape(x0, [M, Fin, N])
            x0 = tf.transpose(x0, [2,0,1]) # shape = N x M x Fin
            x = concat(x, x1) # shape = 2 x N x M x Fin
        for k in range(2, K):
            x1 = tf.transpose(x1, [1,2,0])
            x1 = tf.reshape(x1, [M, Fin*N])
            x2 = tf.sparse_tensor_dense_matmul(L, x1) # shape = M x Fin*N <- L^k * x
            x2 = tf.reshape(x2, [M, Fin, N]) # shape = M x Fin x N <- L^1 * x
            x2 = tf.transpose(x2, [2,0,1])
            x1 = tf.reshape(x1, [M, Fin, N])
            x1 = tf.transpose(x1, [2,0,1]) # shape = N x M x Fin
            x2 = 2 * x2 - x0  # shape = N x M x Fin <- recursive definition of chebyshev polynomials
            x = concat(x, x2) # shape = K x N x M x Fin
            x0, x1 = x1, x2
        x = tf.transpose(x, [1,2,3,0]) # shape = N x M x Fin x K
        x = tf.reshape(x, [N*M, Fin*K])  # shape = N*M x Fin*K
        
        # Filter: Fout filters of order K applied over all the Fin features
        W = self._weight_variable([Fin*K, Fout], regularization=False)
        x = tf.matmul(x, W)  # N*M x Fout
        return tf.reshape(x, [N, M, Fout])  # N x M x Fout

    def b1relu(self, x):
        """Applies bias and ReLU. One bias per filter."""
        N, M, F = x.get_shape()
        b = self._bias_variable([1, 1, int(F)], regularization=False)
        return tf.nn.relu(x + b) #add the bias to the convolutive layer


    def mpool1(self, x, p):
        """Max pooling of size p. Should be a power of 2 (this is possible thanks to the reordering we previously did)."""
        if p > 1:
            x = tf.expand_dims(x, 3)  # shape = N x M x F x 1
            x = tf.nn.max_pool(x, ksize=[1,p,1,1], strides=[1,p,1,1], padding='SAME')
            return tf.squeeze(x, [3])  # shape = N x M/p x F
        else:
            return x

    def fc(self, x, Mout, relu=True):
        """Fully connected layer with Mout features."""
        N, Min = x.get_shape()
        W = self._weight_variable([int(Min), Mout],"", regularization=True)
        b = self._bias_variable([Mout], regularization=True)
        x = tf.matmul(x, W) + b
        return tf.nn.relu(x) if relu else x
    
    #function used for extracting the result of our model
    def _inference(self, x, dropout): #definition of the model
        
        # Graph convolutional layers.
        x = tf.expand_dims(x, 2)  # N x M x F=1
        for i in range(len(self.p)):
            with tf.variable_scope('cgconv{}'.format(i+1)):
                with tf.name_scope('filter'):
                    x = self.monetConv(x,self.L[i*2],self.F[i],self.K[i])
#                     x = self.chebyshevConv(x, self.L[i*2], self.F[i], self.K[i])
                with tf.name_scope('bias_relu'):
                    x = self.b1relu(x)
                with tf.name_scope('pooling'):
                    x = self.mpool1(x, self.p[i])
         
        # Fully connected hidden layers.
        N, M, F = x.get_shape()
        x = tf.reshape(x, [int(N), int(M*F)])  # N x M
        for i,M in enumerate(self.M[:-1]): #apply a fully connected layer for each layer defined in M
                                           #(we discard the last value in M since it contains the number of classes we have
                                           #to predict)
            with tf.variable_scope('fc{}'.format(i+1)):
                x = self.fc(x, M)
                x = tf.nn.dropout(x, dropout)
        
        # Logits linear layer, i.e. softmax without normalization.
        with tf.variable_scope('logits'):
            x = self.fc(x, self.M[-1], relu=False)
        return x
    
    def convert_coo_to_sparse_tensor(self, L):
        indices = np.column_stack((L.row, L.col))
        L = tf.SparseTensor(indices, L.data.astype('float32'), L.shape)
        L = tf.sparse_reorder(L)
        return L
        
    
    def __init__(self, p, K, F, M, M_0, batch_size, L,
                 decay_steps, decay_rate, learning_rate=1e-4, momentum=0.9, regularization=5e-4, 
                 idx_gpu = '/gpu:0'):
        self.regularizers = list() #list of regularization l2 loss for multiple variables
        
        self.p = p #dimensions of the pooling layers
        self.K = K #List of polynomial orders, i.e. filter sizes or number of hops
        self.F = F #Number of features of convolutional layers
        
        self.M = M #Number of neurons in fully connected layers
        
        self.M_0 = M_0 #number of elements in the first graph 
        
        self.batch_size = batch_size
        
        #definition of some learning parameters
        self.decay_steps = decay_steps
        self.decay_rate = decay_rate
        self.learning_rate = learning_rate
        self.regularization = regularization
        
        with tf.Graph().as_default() as g:
                self.graph = g
                tf.set_random_seed(0)
                with tf.device(idx_gpu):
                        #definition of placeholders
                        self.L = [self.convert_coo_to_sparse_tensor(c_L.tocoo()) for c_L in L]
                        self.ph_data = tf.placeholder(tf.float32, (self.batch_size, M_0), 'data')
                        self.ph_labels = tf.placeholder(tf.int32, (self.batch_size), 'labels')
                        self.ph_dropout = tf.placeholder(tf.float32, (), 'dropout')
                    
                        #Model construction
                        self.logits = self._inference(self.ph_data, self.ph_dropout)
                        
                        #Definition of the loss function
                        with tf.name_scope('loss'):
                            self.cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=self.logits, 
                                                                                                labels=self.ph_labels)
                            self.cross_entropy = tf.reduce_mean(self.cross_entropy)
                        with tf.name_scope('regularization'):
                            self.regularization *= tf.add_n(self.regularizers)
                        self.loss = self.cross_entropy + self.regularization
                        
                        #Solver Definition
                        with tf.name_scope('training'):
                            # Learning rate.
                            global_step = tf.Variable(0, name='global_step', trainable=False) #used for counting how many iterations we have done
                            if decay_rate != 1: #applies an exponential decay of the lr wrt the number of iterations done
                                learning_rate = tf.train.exponential_decay(
                                        learning_rate, global_step, decay_steps, decay_rate, staircase=True)
                            # Optimizer.
                            if momentum == 0:
                                optimizer = tf.train.GradientDescentOptimizer(learning_rate)
                            else: #applies momentum for increasing the robustness of the gradient 
                                optimizer = tf.train.MomentumOptimizer(learning_rate, momentum)
                            grads = optimizer.compute_gradients(self.loss)
                            self.op_gradients = optimizer.apply_gradients(grads, global_step=global_step) 
                            
                        #Computation of the norm gradients (useful for debugging)
                        self.var_grad = tf.gradients(self.loss, tf.trainable_variables())
                        self.norm_grad = self.frobenius_norm(tf.concat([tf.reshape(g, [-1]) for g in self.var_grad], 0))

                        #Extraction of the predictions and computation of accuracy
                        self.predictions = tf.cast(tf.argmax(self.logits, dimension=1), tf.int32)
                        self.accuracy = 100 * tf.contrib.metrics.accuracy(self.predictions, self.ph_labels)
        
                        # Create a session for running Ops on the Graph.
                        config = tf.ConfigProto(allow_soft_placement = True)
                        config.gpu_options.allow_growth = True
                        self.session = tf.Session(config=config)

                        # Run the Op to initialize the variables.
                        init = tf.global_variables_initializer()
                        self.session.run(init)
                        
                        self.count_no_weights()

In [9]:
#Convolutional parameters
p = [4, 4]   #Dimensions of the pooling layers
K = [25, 25] #List of polynomial orders, i.e. filter sizes or number of hops
F = [32, 64] #Number of features of convolutional layers

#FC parameters
C = max(mnist.train.labels) + 1 #Number of classes we have
M = [512, C] #Number of neurons in fully connected layers

#Solver parameters
batch_size = 100
decay_steps = mnist.train.num_examples / batch_size #number of steps to do before decreasing the learning rate
decay_rate = 0.95 #how much decreasing the learning rate
learning_rate = 0.02
# learning_rate = 0.001
momentum = 0.9
regularization = 5e-4

#Definition of keep probabilities for dropout layers
dropout_training = 0.5
dropout_val_test = 1.0

In [10]:
#Construction of the learning obj
M_0 = A2[0].shape[0] #number of elements in the first graph
learning_obj = ChebNet(p, K, F, M, M_0, batch_size, A2,
                       decay_steps, decay_rate,
                       learning_rate=learning_rate, regularization=regularization,
                       momentum=momentum)

#definition of overall number of training iterations and validation frequency
num_iter_val = 100
num_total_iter_training = 200000

num_iter = 0

list_training_loss = list()
list_training_norm_grad = list()
list_val_accuracy = list()

Instructions for updating:
Colocations handled automatically by placer.
Instructions for updating:
Please use `rate` instead of `keep_prob`. Rate should be set to `rate = 1 - keep_prob`.
Instructions for updating:
Use the `axis` argument instead
#weights in the model: 2056686


In [18]:
def convert_coo_to_sparse_tensor( L):
    indices = np.column_stack((L.row, L.col))
    L = tf.SparseTensor(indices, L.data.astype('float32'), L.shape)
    L = tf.sparse_reorder(L)
    return L

tf.reset_default_graph()

sess = tf.Session()
init =tf.global_variables_initializer()
F = [32, 64]
with tf.Session() as sess:
    K=J=25
    F_in = 1
    F_out = F[0]
    batch_size =2
    M= A2[0].shape[0]
    x = tf.ones((batch_size,M,1))*2
    u = convert_coo_to_sparse_tensor(A2[0].tocoo())


    j=1
    w = tf.get_variable('w{}'.format(j),shape=(F_in,F_out), dtype=tf.float32)
    mu = tf.get_variable('mu{}'.format(j),shape=(1,J), dtype=tf.float32)
    sigma = tf.get_variable('sigma{}'.format(j),shape=(1,J), dtype=tf.float32)
    init = tf.global_variables_initializer()
    sess.run(init)

    u = convert_coo_to_sparse_tensor(A2[0].tocoo())
#         print(u.values.shape)
#         s_mu = tf.broadcast_to(mu,u.values.shape)
#         s_sigma = tf.broadcast_to(sigma,u.values.shape)

#         a = u*u
    d_u = tf.sparse.to_dense(u)
    d_u = tf.expand_dims(d_u,[2])
    zero = tf.constant(0, dtype=tf.float32)
    bools = tf.not_equal(d_u, zero)
    mask = tf.cast(bools,dtype=tf.float32)

    gaussian= tf.exp((-.5*tf.square((d_u-mu))/ (1e-14 + tf.square(sigma))))*mask
#         vals_ = tf.boolean_mask(gaussian,mask=bools)
#         sum_gaussian = tf.reduce_sum(gaussian,axis=1)
#         print(patch.shape)
#         s_gaussian = tf.sparse.SparseTensor(values=vals_,indices=u.indices,dense_shape=gaussian.shape)
#         temp = tf.sparse.matmul(s_gaussian,x_)
    # Nodes,batch_size,n_dim
    x_ = tf.transpose(x,[1,0,2])
    #remove F_in
    x_r = tf.reshape(x,[x_.shape[0],-1])
    gaussian = tf.transpose(gaussian,[2,0,1])
    gaussian = tf.reshape(gaussian,[-1,M])
    patch = tf.matmul(gaussian,x_r)
    
    patch = tf.reshape(patch,[K*M*batch_size,F_in])
    
# #         tf.reshape()
# #         tf.reshape(patch,[])
# #         gaussian_kernels.append(patch)
    kernel = tf.matmul(patch,w)
    kernel = tf.reshape(kernel,[K,M,batch_size,F_out])
    new_x = tf.reduce_sum(kernel,0)
    print(new_x)
#     kernel = tf.reshape(kernel,[M,batch_size,F_out])
#     gaussian_kernels.append(kernel)
        
# #     new_x = tf.add_n(gaussian_kernels)
# #     print(new_x.shape)
    new_x = tf.transpose(new_x,[1,0,2])
    print(new_x)
#     print(new_x.shape)
#     tf.reshape([])
# #     print(gaussian_kernels[0])
#     print(new_x.eval().shape)
#         print(patch.shape,gaussian.shape,x_r.shape
#         tf.reduce
#         x_ = tf.transpose(x,[1,2])
#         temp = tf.matmul(gaussian,x)
#         gaussian_kernels.append(gaussian)
#     tf.reduce_sum(gaussian,0)


Tensor("Sum:0", shape=(1024, 2, 32), dtype=float32)
Tensor("transpose_2:0", shape=(2, 1024, 32), dtype=float32)


In [7]:
tf.reset_default_graph()

sess = tf.Session()
init =tf.global_variables_initializer()
F = [32, 64]
with tf.Session() as sess:
    J=2
    F_in = 1
    F_out = F[0]
    batch_size =2
    M= A2[0].shape[0]
    x = tf.ones((batch_size,M,1))*2
    u = convert_coo_to_sparse_tensor(A2[0].tocoo())

    gaussian_kernels =[]
    for j in range(J):
        w = tf.get_variable('w{}'.format(j),shape=(F_in,F_out), dtype=tf.float32)
        mu = tf.get_variable('mu{}'.format(j),shape=(1), dtype=tf.float32)
        sigma = tf.get_variable('sigma{}'.format(j),shape=(1), dtype=tf.float32)
        init = tf.global_variables_initializer()
        sess.run(init)
        
        u = convert_coo_to_sparse_tensor(A2[0].tocoo())
#         print(u.values.shape)
#         s_mu = tf.broadcast_to(mu,u.values.shape)
#         s_sigma = tf.broadcast_to(sigma,u.values.shape)

#         a = u*u
        d_u = tf.sparse.to_dense(u)
        zero = tf.constant(0, dtype=tf.float32)
        bools = tf.not_equal(d_u, zero)
        mask = tf.cast(bools,dtype=tf.float32)
        
        gaussian= tf.exp((-.5*tf.square((d_u-mu))/ (1e-14 + tf.square(sigma))))*mask
#         vals_ = tf.boolean_mask(gaussian,mask=bools)
#         sum_gaussian = tf.reduce_sum(gaussian,axis=1)
#         print(patch.shape)
#         s_gaussian = tf.sparse.SparseTensor(values=vals_,indices=u.indices,dense_shape=gaussian.shape)
#         temp = tf.sparse.matmul(s_gaussian,x_)
        # Nodes,batch_size,n_dim
        x_ = tf.transpose(x,[1,0,2])
        #remove F_in
        x_r = tf.reshape(x,[x_.shape[0],-1])
        patch = tf.matmul(gaussian,x_r)
        patch = tf.reshape(patch,[M*batch_size,F_in])
    
#         tf.reshape()
#         tf.reshape(patch,[])
#         gaussian_kernels.append(patch)
        kernel = tf.matmul(patch,w)
        kernel = tf.reshape(kernel,[M,batch_size,F_out])
        gaussian_kernels.append(kernel)
        
    new_x = tf.add_n(gaussian_kernels)
#     print(new_x.shape)
    new_x = tf.transpose(new_x,[1,0,2])
    print(new_x.shape)
#     tf.reshape([])
# #     print(gaussian_kernels[0])
#     print(new_x.eval().shape)
#         print(patch.shape,gaussian.shape,x_r.shape
#         tf.reduce
#         x_ = tf.transpose(x,[1,2])
#         temp = tf.matmul(gaussian,x)
#         gaussian_kernels.append(gaussian)
#     tf.reduce_sum(gaussian,0)


Instructions for updating:
Create a `tf.sparse.SparseTensor` and use `tf.sparse.to_dense` instead.
(2, 1024, 32)


In [8]:
tf.reset_default_graph()

sess = tf.Session()
init =tf.global_variables_initializer()
F = [32, 64]
with tf.Session() as sess:
    J=2
    F_in = 32
    F_out = F[1]
    batch_size =2
    M= A2[2].shape[0]
    x = tf.ones((batch_size,M,F_in))*2
    u = convert_coo_to_sparse_tensor(A2[0].tocoo())

    gaussian_kernels =[]
    for j in range(J):
        w = tf.get_variable('w{}'.format(j),shape=(F_in,F_out), dtype=tf.float32)
        mu = tf.get_variable('mu{}'.format(j),shape=(1), dtype=tf.float32)
        sigma = tf.get_variable('sigma{}'.format(j),shape=(1), dtype=tf.float32)
        init = tf.global_variables_initializer()
        sess.run(init)
        
        u = convert_coo_to_sparse_tensor(A2[2].tocoo())
#         print(u.values.shape)
#         s_mu = tf.broadcast_to(mu,u.values.shape)
#         s_sigma = tf.broadcast_to(sigma,u.values.shape)

#         a = u*u
        d_u = tf.sparse.to_dense(u)
        zero = tf.constant(0, dtype=tf.float32)
        bools = tf.not_equal(d_u, zero)
        mask = tf.cast(bools,dtype=tf.float32)
        
        gaussian= tf.exp((-.5*tf.square((d_u-mu))/ (1e-14 + tf.square(sigma))))*mask
#         vals_ = tf.boolean_mask(gaussian,mask=bools)
#         sum_gaussian = tf.reduce_sum(gaussian,axis=1)
#         print(patch.shape)
#         s_gaussian = tf.sparse.SparseTensor(values=vals_,indices=u.indices,dense_shape=gaussian.shape)
#         temp = tf.sparse.matmul(s_gaussian,x_)
        # Nodes,batch_size,n_dim
        
        x_ = tf.transpose(x,[1,0,2])
        #remove F_in
        x_r = tf.reshape(x,[x_.shape[0],-1])

        patch = tf.matmul(gaussian,x_r)
        
        patch = tf.reshape(patch,[M*batch_size,F_in])
    

# #         gaussian_kernels.append(patch)
        kernel = tf.matmul(patch,w)
        kernel = tf.reshape(kernel,[M,batch_size,F_out])
        print('gaussian {},x_{},x_r{},patch{},kernel_{}'.format(gaussian.shape,x_.shape,x_r.shape,patch.shape,kernel.shape))
        gaussian_kernels.append(kernel)
        
#     new_x = tf.add_n(gaussian_kernels)
# #     print(new_x.shape)
#     new_x = tf.transpose(new_x,[1,0,2])

NameError: name 'convert_coo_to_sparse_tensor' is not defined

In [None]:
#training and validation
indices = collections.deque() #queue that will contain a permutation of the training indexes
for k in range(num_iter, num_total_iter_training):
    
    #Construction of the training batch
    if len(indices) < batch_size: # Be sure to have used all the samples before using one a second time.
        indices.extend(np.random.permutation(train_data.shape[0])) #reinitialize the queue of indices
    idx = [indices.popleft() for i in range(batch_size)] #extract the current batch of samples
    
    #data extraction
    batch_data, batch_labels = train_data[idx,:], train_labels[idx] 
    
    feed_dict = {learning_obj.ph_data: batch_data, 
                 learning_obj.ph_labels: batch_labels, 
                 learning_obj.ph_dropout: dropout_training}
    
    #Training
    tic = time.time()
    _, current_training_loss, norm_grad = learning_obj.session.run([learning_obj.op_gradients, 
                                                                    learning_obj.loss, 
                                                                    learning_obj.norm_grad], feed_dict = feed_dict) 
    training_time = time.time() - tic
    
    list_training_loss.append(current_training_loss)
    list_training_norm_grad.append(norm_grad)
    
    if (np.mod(num_iter, num_iter_val)==0): #validation
        msg = "[TRN] iter = %03i, cost = %3.2e, |grad| = %.2e (%3.2es)" \
                    % (num_iter, list_training_loss[-1], list_training_norm_grad[-1], training_time)
        print(msg)
        
        #Validation Code
        tic = time.time()
        val_accuracy = 0
        for begin in range(0, val_data.shape[0], batch_size):
            end = begin + batch_size
            end = min([end, val_data.shape[0]])
            
            #data extraction
            batch_data = np.zeros((end-begin, val_data.shape[1]))
            batch_data = val_data[begin:end,:]
            batch_labels = np.zeros(batch_size)
            batch_labels[:end-begin] = val_labels[begin:end]
            
            feed_dict = {learning_obj.ph_data: batch_data, 
                         learning_obj.ph_labels: batch_labels,
                         learning_obj.ph_dropout: dropout_val_test}
            
            batch_accuracy = learning_obj.session.run(learning_obj.accuracy, feed_dict)
            val_accuracy += batch_accuracy*batch_data.shape[0]
        val_accuracy = val_accuracy/val_data.shape[0]
        val_time = time.time() - tic
        msg = "[VAL] iter = %03i, acc = %4.2f (%3.2es)" % (num_iter, val_accuracy, val_time)
        print(msg)
    num_iter += 1

[TRN] iter = 000, cost = 3.27e+01, |grad| = 1.32e+03 (4.30e+00s)
[VAL] iter = 000, acc = 8.68 (1.38e+01s)
[TRN] iter = 100, cost = 1.36e+04, |grad| = 3.70e+00 (1.02e+00s)
[VAL] iter = 100, acc = 11.26 (1.44e+01s)
[TRN] iter = 200, cost = 1.33e+04, |grad| = 3.66e+00 (1.18e+00s)
[VAL] iter = 200, acc = 11.26 (1.48e+01s)
[TRN] iter = 300, cost = 1.31e+04, |grad| = 3.62e+00 (1.03e+00s)
[VAL] iter = 300, acc = 9.86 (1.45e+01s)
[TRN] iter = 400, cost = 1.28e+04, |grad| = 3.58e+00 (1.07e+00s)
[VAL] iter = 400, acc = 11.26 (1.47e+01s)
[TRN] iter = 500, cost = 1.26e+04, |grad| = 3.55e+00 (1.05e+00s)
[VAL] iter = 500, acc = 11.26 (1.44e+01s)
[TRN] iter = 600, cost = 1.23e+04, |grad| = 3.51e+00 (1.05e+00s)
[VAL] iter = 600, acc = 11.26 (1.44e+01s)
[TRN] iter = 700, cost = 1.21e+04, |grad| = 3.48e+00 (1.02e+00s)
[VAL] iter = 700, acc = 11.26 (1.44e+01s)
[TRN] iter = 800, cost = 1.19e+04, |grad| = 3.45e+00 (1.58e+00s)
[VAL] iter = 800, acc = 11.26 (1.68e+01s)
[TRN] iter = 900, cost = 1.16e+04, |gra

In [21]:
#Test code
tic = time.time()
test_accuracy = 0
for begin in range(0, test_data.shape[0], batch_size):
    end = begin + batch_size
    end = min([end, test_data.shape[0]])
            
    batch_data = np.zeros((end-begin, test_data.shape[1]))
    batch_data = test_data[begin:end,:]
            
    feed_dict = {learning_obj.ph_data: batch_data, learning_obj.ph_dropout: 1}
            
    batch_labels = np.zeros(batch_size)
    batch_labels[:end-begin] = test_labels[begin:end]
    feed_dict[learning_obj.ph_labels] = batch_labels
            
    batch_accuracy = learning_obj.session.run(learning_obj.accuracy, feed_dict)
    test_accuracy += batch_accuracy*batch_data.shape[0]
test_accuracy = test_accuracy/test_data.shape[0]
test_time = time.time() - tic
msg = "[TST] iter = %03i, acc = %4.2f (%3.2es)" % (num_iter, test_accuracy, test_time)
print(msg)

[TST] iter = 21000, acc = 98.91 (2.71e+00s)
