# Residual Gated Graph ConvNets
### Xavier Bresson, Jan. 15 2018

In [1]:
import torch
from torch.autograd import Variable
import torch.nn.functional as F
import torch.nn as nn
import pdb #pdb.set_trace()
import time
import numpy as np
import pickle


# Choose the GPU below
if 1==1:
    import os
    os.environ["CUDA_DEVICE_ORDER"]="PCI_BUS_ID"
    gpu_id = 3  # select the GPU in 0,1,2,3
    os.environ["CUDA_VISIBLE_DEVICES"]=str(gpu_id) 
    # check with: nvidia-smi

if torch.cuda.is_available():
    print('cuda available')
    dtypeFloat = torch.cuda.FloatTensor
    dtypeLong = torch.cuda.LongTensor
    #torch.cuda.manual_seed(1)
else:
    print('cuda not available')
    dtypeFloat = torch.FloatTensor
    dtypeLong = torch.LongTensor
    #torch.manual_seed(1)
    
    
# import files in folder util
import sys
sys.path.insert(0, 'util/')
#%load_ext autoreload
#%autoreload 2
import block 
import graph_generator as g


from sklearn.metrics import confusion_matrix

cuda available


In [2]:
##############################
# Class cell definition
##############################
class OurConvNetcell(nn.Module):
    
    # NOTE: Each graph convnet cell uses *TWO* convolutional operations

    def __init__(self, dim_in, dim_out, flag_resnet, dpr):
        super(OurConvNetcell, self).__init__()

        # conv: x = ReLU ( Ui.x + sum_j sigma(Vi.x+Vj.x+bv) . Uj.x + bu )
        
        #dropout 
        self.drop = nn.Dropout(p=dpr)
        
        # conv1
        self.Ui1 = nn.Linear(dim_in, dim_out, bias=False) 
        self.Uj1 = nn.Linear(dim_in, dim_out, bias=False)
        self.Vi1 = nn.Linear(dim_in, dim_out, bias=False) 
        self.Vj1 = nn.Linear(dim_in, dim_out, bias=False)  
        self.bu1 = torch.nn.Parameter( torch.FloatTensor(dim_out), requires_grad=True )
        self.bv1 = torch.nn.Parameter( torch.FloatTensor(dim_out), requires_grad=True )
        
        # conv2
        self.Ui2 = nn.Linear(dim_out, dim_out, bias=False) 
        self.Uj2 = nn.Linear(dim_out, dim_out, bias=False)
        self.Vi2 = nn.Linear(dim_out, dim_out, bias=False) 
        self.Vj2 = nn.Linear(dim_out, dim_out, bias=False)  
        self.bu2 = torch.nn.Parameter( torch.FloatTensor(dim_out), requires_grad=True )
        self.bv2 = torch.nn.Parameter( torch.FloatTensor(dim_out), requires_grad=True )
        
        # RN
        if flag_resnet=='vanilla_resnet':
            # bn1, bn2
            self.bn1 = torch.nn.BatchNorm1d(dim_out)
            self.bn2 = torch.nn.BatchNorm1d(dim_out)
            # resnet
            self.R1a = nn.Linear(dim_out, dim_out, bias=False) 
            self.R1b = nn.Linear(dim_in, dim_out, bias=False) 
            self.R2a = nn.Linear(dim_out, dim_out, bias=False) 
            self.R2b = nn.Linear(dim_out, dim_out, bias=False)
            
        if flag_resnet=='he_resnet':
            # bn1, bn2
            self.bn1 = torch.nn.BatchNorm1d(dim_out, track_running_stats=False)
            self.bn2 = torch.nn.BatchNorm1d(dim_out, track_running_stats=False)
            # resnet
            self.R = nn.Linear(dim_in, dim_out, bias=False) 
            
        if flag_resnet=='no_resnet':
            # bn1, bn2
            self.bn1 = torch.nn.BatchNorm1d(dim_out)
            self.bn2 = torch.nn.BatchNorm1d(dim_out)
        
        # class variable
        self.flag_resnet = flag_resnet
        
        # init
        self.init_weights_OurConvNetcell(dim_in, dim_out, 1)
        
         
    def init_weights_OurConvNetcell(self, dim_in, dim_out, gain):
        
        # TRY NEW THINGS LATER !!!!!!!!!!!

        
        # conv1
        scale = gain* np.sqrt( 2.0/ dim_in )
        self.Ui1.weight.data.uniform_(-scale, scale) 
        self.Uj1.weight.data.uniform_(-scale, scale) 
        self.Vi1.weight.data.uniform_(-scale, scale) 
        self.Vj1.weight.data.uniform_(-scale, scale) 
        scale = gain* np.sqrt( 2.0/ dim_out )
        self.bu1.data.fill_(0)
        self.bv1.data.fill_(0)
        
        # conv2
        scale = gain* np.sqrt( 2.0/ dim_out )
        self.Ui2.weight.data.uniform_(-scale, scale) 
        self.Uj2.weight.data.uniform_(-scale, scale) 
        self.Vi2.weight.data.uniform_(-scale, scale) 
        self.Vj2.weight.data.uniform_(-scale, scale) 
        scale = gain* np.sqrt( 2.0/ dim_out )
        self.bu2.data.fill_(0)
        self.bv2.data.fill_(0)
        
        # RN
        if self.flag_resnet=='vanilla_resnet':
            scale = gain* np.sqrt( 2.0/ dim_out )
            self.R1a.weight.data.uniform_(-scale, scale)  
            scale = gain* np.sqrt( 2.0/ dim_in )
            self.R1b.weight.data.uniform_(-scale, scale)
            scale = gain* np.sqrt( 2.0/ dim_out )
            self.R2a.weight.data.uniform_(-scale, scale)  
            scale = gain* np.sqrt( 2.0/ dim_out )
            self.R2b.weight.data.uniform_(-scale, scale) 
            
        if self.flag_resnet=='he_resnet':
            scale = gain* np.sqrt( 2.0/ dim_in )
            self.R.weight.data.uniform_(-scale, scale)  
            
        
        

        
    def forward(self, x, E_start, E_end):
        
        # conv: x = ReLU ( Ui.x + sum_j sigma(Vi.x+Vj.x+bv) . Uj.x + bu )
        # E_start, E_end : E x V
           

        if self.flag_resnet=='vanilla_resnet':
            xin = x
            # conv1
            Vix = self.Vi1(x)  #  V x H_out
            Vjx = self.Vj1(x)  #  V x H_out
            x1 = torch.mm(E_end,Vix) + torch.mm(E_start,Vjx) + self.bv1  # E x H_out
            x1 = F.sigmoid(x1)
            Uix = self.Ui1(x)  #  V x H_out
            x2 = torch.mm(E_start, Uix)  #  V x H_out
            Ujx = self.Uj1(x)  #  V x H_out
            x = Uix + torch.mm(E_end.t(), x1*x2) + self.bu1 #  V x H_out
            # relu1
            x = F.relu(x)
            # addition
            x = self.R1a(x) + self.R1b(xin)
            # bn1
            x = self.bn1(x)
            xin = x
            # conv2
            Vix = self.Vi2(x)  #  V x H_out
            Vjx = self.Vj2(x)  #  V x H_out
            x1 = torch.mm(E_end,Vix) + torch.mm(E_start,Vjx) + self.bv2  # E x H_out
            x1 = F.sigmoid(x1)
            Uix = self.Ui2(x)  #  V x H_out
            x2 = torch.mm(E_start, Uix)  #  V x H_out
            Ujx = self.Uj2(x)  #  V x H_out
            x = Uix + torch.mm(E_end.t(), x1*x2) + self.bu2 #  V x H_out
            # relu2
            x = F.relu(x)
            # addition
            x = self.R2a(x) + self.R2b(xin)
            # bn1
            x = self.bn2(x)
            
            
        if self.flag_resnet=='he_resnet':
            
            x = self.drop(x)
            
            xin = x
            # conv1
            Vix = self.Vi1(x)  #  V x H_out
            Vjx = self.Vj1(x)  #  V x H_out
            x1 = torch.mm(E_end,Vix) + torch.mm(E_start,Vjx) + self.bv1  # E x H_out
            x1 = F.sigmoid(x1)
            Uix = self.Ui1(x)  #  V x H_out
            x2 = torch.mm(E_start, Uix)  #  V x H_out
            Ujx = self.Uj1(x)  #  V x H_out
            x = Uix + torch.mm(E_end.t(), x1*x2) + self.bu1 #  V x H_out
            # bn1
            x = self.bn1(x)
            # relu1
            x = F.relu(x)
            # conv2
            Vix = self.Vi2(x)  #  V x H_out
            Vjx = self.Vj2(x)  #  V x H_out
            x1 = torch.mm(E_end,Vix) + torch.mm(E_start,Vjx) + self.bv2  # E x H_out
            x1 = F.sigmoid(x1)
            Uix = self.Ui2(x)  #  V x H_out
            x2 = torch.mm(E_start, Uix)  #  V x H_out
            Ujx = self.Uj2(x)  #  V x H_out
            x = Uix + torch.mm(E_end.t(), x1*x2) + self.bu2 #  V x H_out
            # bn2
            x = self.bn2(x)
            # addition
            x = x + self.R(xin)
            # relu2
            x = F.relu(x)
            
            
        if self.flag_resnet=='no_resnet':
            # conv1
            
            #x = pytorch.dropout(x)
            
            Vix = self.Vi1(x)  #  V x H_out
            Vjx = self.Vj1(x)  #  V x H_out
            x1 = torch.mm(E_end,Vix) + torch.mm(E_start,Vjx) + self.bv1  # E x H_out
            x1 = F.sigmoid(x1)
            Uix = self.Ui1(x)  #  V x H_out
            x2 = torch.mm(E_start, Uix)  #  V x H_out
            Ujx = self.Uj1(x)  #  V x H_out
            x = Uix + torch.mm(E_end.t(), x1*x2) + self.bu1 #  V x H_out
            # relu1
            x = F.relu(x)
            # bn1
            x = self.bn1(x)
            # conv2
            Vix = self.Vi2(x)  #  V x H_out
            Vjx = self.Vj2(x)  #  V x H_out
            x1 = torch.mm(E_end,Vix) + torch.mm(E_start,Vjx) + self.bv2  # E x H_out
            x1 = F.sigmoid(x1)
            Uix = self.Ui2(x)  #  V x H_out
            x2 = torch.mm(E_start, Uix)  #  V x H_out
            Ujx = self.Uj2(x)  #  V x H_out
            x = Uix + torch.mm(E_end.t(), x1*x2) + self.bu2 #  V x H_out
            # relu2
            x = F.relu(x)
            # bn1
            x = self.bn2(x)
            
            
            
            
        return x
        
        
        
        
        
##############################
# Class NN definition
##############################  
class Graph_OurConvNet(nn.Module):
    
    # NOTE: Each graph convnet cell uses *TWO* convolutional operations
    
    def __init__(self, net_parameters):
        
        super(Graph_OurConvNet, self).__init__()
        
        # parameters
        flag_task = task_parameters['flag_task']
        Voc = net_parameters['Voc']
        D = net_parameters['D']
        nb_clusters_target = net_parameters['nb_clusters_target']
        flag_resnet = net_parameters['flag_resnet']
        H = net_parameters['H']
        L = net_parameters['L']
        dpr = net_parameters['dpr']
        
        # vector of hidden dimensions
        net_layers = []
        for layer in range(L):
            net_layers.append(H)

        # embedding
        self.encoder = nn.Embedding(Voc, D)      
        
        # CL cells
        # NOTE: Each graph convnet cell uses *TWO* convolutional operations
        net_layers_extended = [D] + net_layers # include embedding dim
        L = len(net_layers)
        list_of_gnn_cells = [] # list of NN cells
        list_of_bn_cells = [] # list of BN cells
        for layer in range(L//2):
            Hin, Hout = net_layers_extended[2*layer], net_layers_extended[2*layer+2]
            list_of_gnn_cells.append(OurConvNetcell(Hin,Hout,flag_resnet,dpr))
            
        # register the cells for pytorch
        self.gnn_cells = nn.ModuleList(list_of_gnn_cells)
              
        # fc
        Hfinal = net_layers_extended[-1]
        self.fc = nn.Linear(Hfinal,nb_clusters_target) 
        
        # init
        self.init_weights_Graph_OurConvNet(Voc,D,Hfinal,nb_clusters_target,1)
        
        # print
        print('\nnb of hidden layers=',L)
        print('dim of layers (w/ embed dim)=',net_layers_extended)      
        if flag_resnet=='he_resnet':
            print('He resnet active')
        if flag_resnet=='vanilla_resnet':
            print('Vanilla resnet active')
        if flag_resnet=='no_resnet':
            print('Resnet *not* active')
        print('\n')
        
        # class variables
        self.L = L
        self.net_layers_extended = net_layers_extended      
        self.flag_task = flag_task
        
        
    def init_weights_Graph_OurConvNet(self, Fin_enc, Fout_enc, Fin_fc, Fout_fc, gain):

        scale = gain* np.sqrt( 2.0/ Fin_enc )
        self.encoder.weight.data.uniform_(-scale, scale)  
        scale = gain* np.sqrt( 2.0/ Fin_fc )
        self.fc.weight.data.uniform_(-scale, scale)  
        self.fc.bias.data.fill_(0)  
    
            
    def forward(self, G):
        
        # signal
        x = G.signal  # V-dim
        x = Variable( torch.LongTensor(x).type(dtypeLong) , requires_grad=False)
           
        # encoder
        x_emb = self.encoder(x) # V x D
        
        # graph operators
        # Edge = start vertex to end vertex
        # E_start = E x V mapping matrix from edge index to corresponding start vertex
        # E_end = E x V mapping matrix from edge index to corresponding end vertex
        E_start = G.edge_to_starting_vertex
        E_end   = G.edge_to_ending_vertex
        #E_start = Variable( torch.FloatTensor(E_start).type(dtypeFloat) , requires_grad=False) 
        #E_end = Variable( torch.FloatTensor(E_end).type(dtypeFloat) , requires_grad=False) 
        E_start = torch.from_numpy(E_start.toarray()).type(dtypeFloat)
        E_end = torch.from_numpy(E_end.toarray()).type(dtypeFloat) 
        E_start = Variable( E_start , requires_grad=False) 
        E_end = Variable( E_end , requires_grad=False) 
        
        # convnet cells  
        x = x_emb
        for layer in range(self.L//2):
            gnn_layer = self.gnn_cells[layer]            
            x = gnn_layer(x,E_start,E_end) # V x Hfinal
            
        # FC
        x = self.fc(x) 
        
        x = torch.sum(x)
        
        return x
        
        
    def loss(self, y, y_target):
        
        loss = nn.MSELoss()(y,y_target) #changed for regression
        
        return loss
       
        
    def update(self, lr):
                
        #update = torch.optim.SGD( self.parameters(), lr=lr )
        #update = torch.optim.SGD( self.parameters(), lr=lr, momentum=0.9 )
        update = torch.optim.Adam( self.parameters(), lr=lr )
        
        return update
    
    
    def update_learning_rate(self, optimizer, lr):
   
        for param_group in optimizer.param_groups:
            param_group['lr'] = lr

        return optimizer
    
    
    def nb_param(self):

        return self.nb_param
    
    
    
    
    
#### TEST
if 2==1:
    

    # Delete existing network if exists
    try:
        del net
        print('Delete existing network\n')
    except NameError:
        print('No existing network to delete\n')


    # subgraph matching
    if 1==1:
        task_parameters = {}
        task_parameters['flag_task'] = 'matching'
        task_parameters['nb_communities'] = 10
        task_parameters['nb_clusters_target'] = 2
        task_parameters['Voc'] = 3
        task_parameters['size_min'] = 15
        task_parameters['size_max'] = 25
        task_parameters['size_subgraph'] = 20
        task_parameters['p'] = 0.5
        task_parameters['q'] = 0.1
        task_parameters['W0'] = block.random_graph(task_parameters['size_subgraph'],task_parameters['p'])
        task_parameters['u0'] = np.random.randint(task_parameters['Voc'],size=task_parameters['size_subgraph'])


    # network parameters
    net_parameters = {}
    net_parameters['Voc'] = task_parameters['Voc']
    net_parameters['D'] = 150 # 50
    net_parameters['nb_clusters_target'] = task_parameters['nb_clusters_target']
    net_parameters['flag_resnet'] = 'he_resnet' # ResNet
    #net_parameters['flag_resnet'] = 'vanilla_resnet' # ResNet
    net_parameters['flag_resnet'] = 'no_resnet' # ResNet
    net_parameters['flag_bn'] = True     # batch normalization
    net_parameters['H'] = 50
    net_parameters['L'] = 4 # 2
    #print(net_parameters)

    # instantiate
    net = Graph_OurConvNet(net_parameters)
    if torch.cuda.is_available():
        net.cuda()
    print(net)

    # number of network parameters
    nb_param = 0
    for param in net.parameters():
        nb_param += np.prod(list(param.data.size()))
    print('nb_param=',nb_param)





    # optimization parameters
    opt_parameters = {}
    opt_parameters['learning_rate'] = 0.0075   # ADAM
    opt_parameters['max_iters'] = 5000   
    opt_parameters['max_iters'] = 101 ####################
    opt_parameters['batch_iters'] = 10 ####################   
    opt_parameters['decay_rate'] = 1.25   
    #print(opt_parameters)

    # task parameters
    flag_task = task_parameters['flag_task']
    # network parameters
    Voc = net_parameters['Voc']
    D = net_parameters['D']
    nb_clusters_target = net_parameters['nb_clusters_target']
    flag_resnet = net_parameters['flag_resnet']
    flag_bn = net_parameters['flag_bn']
    H = net_parameters['H']
    L = net_parameters['L']
    # optimization parame
    learning_rate = opt_parameters['learning_rate']
    max_iters = opt_parameters['max_iters']
    batch_iters = opt_parameters['batch_iters']
    decay_rate = opt_parameters['decay_rate']


    # Optimizer
    global_lr = learning_rate
    global_step = 0
    lr = learning_rate
    optimizer = net.update(lr) 


    # generate one train graph
    if flag_task=='matching': # subgraph matching
        train_x = g.variable_size_graph(task_parameters)
    elif flag_task=='clustering': # semi supervised clustering
        train_x = g.graph_semi_super_clu(task_parameters)
    train_y = train_x.target
    train_y = Variable( torch.LongTensor(train_y).type(dtypeLong) , requires_grad=False) 

    # reset time
    t_start = time.time()

    # forward, loss
    y = net.forward(train_x)


    # compute loss weigth
    labels = train_y.data.cpu().numpy()
    V = labels.shape[0]
    nb_classes = len(np.unique(labels)) 
    cluster_sizes = np.zeros(nb_classes)
    for r in range(nb_classes):
        cluster = np.where(labels==r)[0]
        cluster_sizes[r] = len(cluster)    
    weight = torch.zeros(nb_classes)
    for r in range(nb_classes):
        sumj = 0
        for j in range(nb_classes):
            if j!=r:
                sumj += cluster_sizes[j]
        weight[r] = sumj/ V 
    loss = net.loss(y,train_y,weight)


    # backward, update
    loss.backward()
    optimizer.step()
    optimizer.zero_grad()
    
    
    

In [3]:

# subgraph matching
if 1==2:
    task_parameters = {}
    task_parameters['flag_task'] = 'matching'
    task_parameters['nb_communities'] = 10
    task_parameters['nb_clusters_target'] = 2
    task_parameters['Voc'] = 3
    task_parameters['size_min'] = 15
    task_parameters['size_max'] = 25
    task_parameters['size_subgraph'] = 20
    task_parameters['p'] = 0.5
    task_parameters['q'] = 0.1
    task_parameters['W0'] = block.random_graph(task_parameters['size_subgraph'],task_parameters['p'])
    task_parameters['u0'] = np.random.randint(task_parameters['Voc'],size=task_parameters['size_subgraph'])

    
# semi supervised clustering
if 1==2:
    task_parameters = {}
    task_parameters['flag_task'] = 'clustering'
    task_parameters['nb_communities'] = 10
    task_parameters['nb_clusters_target'] = task_parameters['nb_communities']
    task_parameters['Voc'] = task_parameters['nb_communities'] + 1
    task_parameters['size_min'] = 5
    task_parameters['size_max'] = 25
    task_parameters['p'] = 0.5
    task_parameters['q'] = 0.1  
    file_name = 'data/set_100_clustering_maps_p05_q01_size5_25_2017-10-31_10-25-00_.txt'
    with open(file_name, 'rb') as fp:
        all_trainx = pickle.load(fp)
    task_parameters['all_trainx'] = all_trainx[:100]
    
    
# predicting solubility
if 1==1:
    task_parameters = {}
    task_parameters['flag_task'] = 'predicting_sol'
    task_parameters['nb_clusters_target'] = 1
    task_parameters['Voc'] = 10 #no. of unique atoms in the data set
    task_parameters['atm_dict'] = {'Cl': 0, 'N': 1, 'O': 2, 'S': 3, 'P': 4, 'F': 5, 'Br': 6, 'I': 7, 'C': 8, 'H': 9}
    
    
#print(task_parameters)

In [4]:

#################
# network and optimization parameters
#################


# network parameters
net_parameters = {}
net_parameters['Voc'] = task_parameters['Voc']
net_parameters['D'] = 50
net_parameters['nb_clusters_target'] = task_parameters['nb_clusters_target']
net_parameters['flag_resnet'] = 'he_resnet' # ResNet
net_parameters['flag_bn'] = True     # batch normalization
net_parameters['H'] = 50
net_parameters['L'] = 2
net_parameters['dpr'] = 0.05
#print(net_parameters)

# optimization parameters
opt_parameters = {}
opt_parameters['learning_rate'] = 0.005    #ADAM
opt_parameters['max_iters'] = 5000   
opt_parameters['max_iters'] = 50 ####################
opt_parameters['batch_iters'] = 2 ####################   
opt_parameters['decay_rate'] = 1.25   
#print(opt_parameters)



######################
# reading total smiles and solubilities
######################

import csv
 
tot_smiles = []
tot_solubility = []

rownum = 0

rfile = open('delaney.csv', newline='')
csv_reader = csv.reader(rfile)
 
for row in csv_reader:
    if rownum != 0 :
        tot_smiles.append(row[9])
        tot_solubility.append(float(row[8]))
        
    rownum += 1
    
rfile.close()




    
#########################
# Graph convnet function
#########################
def our_graph_convnets(task_parameters,net_parameters,opt_parameters):


    # Delete existing network if exists
    try:
        del net
        print('Delete existing network\n')
    except NameError:
        print('No existing network to delete\n')


    # instantiate
    net = Graph_OurConvNet(net_parameters)
    if torch.cuda.is_available():
        net.cuda()
    print(net)
    

    # task parameters
    flag_task = task_parameters['flag_task']
    # network parameters
    Voc = net_parameters['Voc']
    D = net_parameters['D']
    nb_clusters_target = net_parameters['nb_clusters_target']
    flag_resnet = net_parameters['flag_resnet']
    flag_bn = net_parameters['flag_bn']
    H = net_parameters['H']
    L = net_parameters['L']
    # optimization parame
    learning_rate = opt_parameters['learning_rate']
    max_iters = opt_parameters['max_iters']
    batch_iters = opt_parameters['batch_iters']
    decay_rate = opt_parameters['decay_rate']
    
    
    # Optimizer
    global_lr = learning_rate
    global_step = 0
    lr = learning_rate
    optimizer = net.update(lr) 

    
    #############
    # loop over epochs
    #############
    t_start = time.time()
    t_start_total = time.time()
    average_loss_old = 1e10
    running_loss = 0.0
    running_total = 0
    running_conf_mat = 0
    running_accuracy = 0
    tab_results = []
    
    nb_training = 800    #number of training data points
    nb_iters = nb_training * max_iters
    
    net.train() # calling the training mode
    
    for iteration in range(nb_iters):  # loop over the dataset multiple times # changed to nb_iters

        # generate one train graph
        if flag_task=='matching': # subgraph matching
            train_x = g.variable_size_graph(task_parameters)
        elif flag_task=='clustering': # semi supervised clustering
            train_x = g.graph_semi_super_clu(task_parameters)
        elif flag_task=='predicting_sol': # solubility regression
            train_x = g.predict_solubility(task_parameters,tot_smiles[(iteration % (nb_training))],tot_solubility[(iteration % (nb_training))])
        
        train_y = train_x.target
        train_y = Variable( torch.FloatTensor(train_y).type(dtypeFloat) , requires_grad=False) 

        # reset time
        t_start = time.time()

        # forward, loss
        #rint(tot_smiles[(iteration % (nb_training - 1))])
        #rint(tot_solubility[(iteration % (nb_training - 1))])
        y = net.forward(train_x)
        # compute loss weigth
        #labels = train_y.data.cpu().numpy()
        #V = labels.shape[0]
        #nb_classes = len(np.unique(labels)) 
        #cluster_sizes = np.zeros(nb_classes)
        #for r in range(nb_classes):
            #cluster = np.where(labels==r)[0]
            #cluster_sizes[r] = len(cluster)    
        #weight = torch.zeros(nb_classes)
        #for r in range(nb_classes):
            #sumj = 0
            #for j in range(nb_classes):
                #if j!=r:
                    #sumj += cluster_sizes[j]
            #weight[r] = sumj/ V 
        loss = net.loss(y,train_y)
        loss_train = loss.data[0] #mean square error
        
        loss_train = loss_train**(0.5) #root mean squate error
        
        running_loss += loss_train
        running_total += 1

        # confusion matrix
        #S = train_y.data.cpu().numpy()
        #C = np.argmax( torch.nn.Softmax()(y).data.cpu().numpy() , axis=1)
        #CM = confusion_matrix(S,C).astype(np.float32)
        #nb_classes = CM.shape[0]
        #train_y = train_y.data.cpu().numpy()
        #for r in range(nb_classes):
            #cluster = np.where(train_y==r)[0]
            #CM[r,:] /= cluster.shape[0]
        #running_conf_mat += CM
        #running_accuracy += np.sum(np.diag(CM))/ nb_classes

        # backward, update
        loss.backward()
        optimizer.step()
        optimizer.zero_grad()

        # learning rate, print results
        if not (iteration)%(batch_iters*800):

            # time
            t_stop = time.time() - t_start
            t_start = time.time()

            # confusion matrix
            #average_conf_mat = running_conf_mat/ running_total
            #running_conf_mat = 0

            # accuracy
            #average_accuracy = running_accuracy/ running_total
            #running_accuracy = 0

            # update learning rate 
            average_loss = running_loss/ running_total
            if average_loss > 0.99* average_loss_old:
                lr /= decay_rate
            average_loss_old = average_loss
            optimizer = net.update_learning_rate(optimizer, lr)
            running_loss = 0.0
            running_total = 0

            # save intermediate results
            tab_results.append([iteration,average_loss,time.time()-t_start_total])

            # print results
            if 1==1:
                print('\niteration= %d, loss(1000iter)= %.3f, lr= %.5f, time(1000iter)= %.3f' % 
                      (iteration, average_loss, lr, t_stop))
                #print('Confusion matrix= \n', 100* average_conf_mat)
                #print('Accuracy= \n', 100* average_accuracy)

                
    print("")
    print("Predicted values \n")
    ############            
    # Evaluation on 20 pre-saved data
    ############
    running_loss = 0.0
    running_total = 0
    running_conf_mat = 0
    running_accuracy = 0
    tst_dps = 20    #test for 20 test data
    
    net.eval() # calling the evaluating/testing mode
    
    for iteration in range(tst_dps):
        
        # generate one data
        if flag_task == 'matching':
            train_x = g.variable_size_graph(task_parameters)
        if flag_task == 'clustering':
            train_x = task_parameters['all_trainx'][iteration][1]
        elif flag_task=='predicting_sol': # solubility regression
            train_x = g.predict_solubility(task_parameters,tot_smiles[(iteration + 820)],tot_solubility[(iteration +820)])
            
        train_y = train_x.target
        train_y = Variable( torch.FloatTensor(train_y).type(dtypeFloat) , requires_grad=False) 
        
        # forward, loss
        y = net.forward(train_x)
        
        
        print("Original = %.3f, Predicted = %.3f" % (train_y, y))
        
        # compute loss weigth
        #labels = train_y.data.cpu().numpy()
        #V = labels.shape[0]
        #nb_classes = len(np.unique(labels)) 
        #cluster_sizes = np.zeros(nb_classes)
        #for r in range(nb_classes):
            #cluster = np.where(labels==r)[0]
            #cluster_sizes[r] = len(cluster)    
        #weight = torch.zeros(nb_classes)
        #for r in range(nb_classes):
            #sumj = 0
            #for j in range(nb_classes):
                #if j!=r:
                    #sumj += cluster_sizes[j]
            #weight[r] = sumj/ V 
        loss = net.loss(y,train_y)
        loss_train = loss.data[0] #mse
        
        loss_train = loss_train**(0.5) #rmse
        
        running_loss += loss_train
        running_total += 1
        
        # confusion matrix
        #S = train_y.data.cpu().numpy()
        #C = np.argmax( torch.nn.Softmax()(y).data.cpu().numpy() , axis=1)
        #CM = confusion_matrix(S,C).astype(np.float32)
        #nb_classes = CM.shape[0]
        #train_y = train_y.data.cpu().numpy()
        #for r in range(nb_classes):
            #cluster = np.where(train_y==r)[0]
            #CM[r,:] /= cluster.shape[0]
        #running_conf_mat += CM
        #running_accuracy += np.sum(np.diag(CM))/ nb_classes

        # confusion matrix
        #average_conf_mat = running_conf_mat/ running_total
        #average_accuracy = running_accuracy/ running_total
        average_loss = running_loss/ running_total
        
        
        
    #############
    # output
    #############
    result = {}
    result['final_testing_loss'] = average_loss
    #result['final_acc'] = 100* average_accuracy
    #result['final_CM'] = 100* average_conf_mat
    result['final_batch_time'] = t_stop
    result['nb_param_nn'] = net.nb_param
    result['plot_all_epochs'] = tab_results
    #print(result)
    
    
    return result






#run it
result = our_graph_convnets(task_parameters,net_parameters,opt_parameters)

print("")

print("The final test results and training ephochs")

print("")

print(result)


No existing network to delete


nb of hidden layers= 2
dim of layers (w/ embed dim)= [50, 50, 50]
He resnet active


Graph_OurConvNet(
  (encoder): Embedding(10, 50)
  (gnn_cells): ModuleList(
    (0): OurConvNetcell(
      (drop): Dropout(p=0.05)
      (Ui1): Linear(in_features=50, out_features=50, bias=False)
      (Uj1): Linear(in_features=50, out_features=50, bias=False)
      (Vi1): Linear(in_features=50, out_features=50, bias=False)
      (Vj1): Linear(in_features=50, out_features=50, bias=False)
      (Ui2): Linear(in_features=50, out_features=50, bias=False)
      (Uj2): Linear(in_features=50, out_features=50, bias=False)
      (Vi2): Linear(in_features=50, out_features=50, bias=False)
      (Vj2): Linear(in_features=50, out_features=50, bias=False)
      (bn1): BatchNorm1d(50, eps=1e-05, momentum=0.1, affine=True, track_running_stats=False)
      (bn2): BatchNorm1d(50, eps=1e-05, momentum=0.1, affine=True, track_running_stats=False)
      (R): Linear(in_features=50, out_featur




iteration= 1600, loss(1000iter)= 1.000, lr= 0.00500, time(1000iter)= 0.008

iteration= 3200, loss(1000iter)= 0.794, lr= 0.00500, time(1000iter)= 0.005

iteration= 4800, loss(1000iter)= 0.756, lr= 0.00500, time(1000iter)= 0.005

iteration= 6400, loss(1000iter)= 0.731, lr= 0.00500, time(1000iter)= 0.004

iteration= 8000, loss(1000iter)= 0.707, lr= 0.00500, time(1000iter)= 0.004

iteration= 9600, loss(1000iter)= 0.675, lr= 0.00500, time(1000iter)= 0.004

iteration= 11200, loss(1000iter)= 0.688, lr= 0.00400, time(1000iter)= 0.004

iteration= 12800, loss(1000iter)= 0.611, lr= 0.00400, time(1000iter)= 0.004

iteration= 14400, loss(1000iter)= 0.618, lr= 0.00320, time(1000iter)= 0.004

iteration= 16000, loss(1000iter)= 0.566, lr= 0.00320, time(1000iter)= 0.004

iteration= 17600, loss(1000iter)= 0.557, lr= 0.00320, time(1000iter)= 0.004

iteration= 19200, loss(1000iter)= 0.552, lr= 0.00256, time(1000iter)= 0.004

iteration= 20800, loss(1000iter)= 0.523, lr= 0.00256, time(1000iter)= 0.004

iter



In [5]:
print(result)

{'final_testing_loss': tensor(0.3677, device='cuda:0'), 'final_batch_time': 0.0041294097900390625, 'nb_param_nn': <bound method Graph_OurConvNet.nb_param of Graph_OurConvNet(
  (encoder): Embedding(10, 50)
  (gnn_cells): ModuleList(
    (0): OurConvNetcell(
      (drop): Dropout(p=0.05)
      (Ui1): Linear(in_features=50, out_features=50, bias=False)
      (Uj1): Linear(in_features=50, out_features=50, bias=False)
      (Vi1): Linear(in_features=50, out_features=50, bias=False)
      (Vj1): Linear(in_features=50, out_features=50, bias=False)
      (Ui2): Linear(in_features=50, out_features=50, bias=False)
      (Uj2): Linear(in_features=50, out_features=50, bias=False)
      (Vi2): Linear(in_features=50, out_features=50, bias=False)
      (Vj2): Linear(in_features=50, out_features=50, bias=False)
      (bn1): BatchNorm1d(50, eps=1e-05, momentum=0.1, affine=True, track_running_stats=False)
      (bn2): BatchNorm1d(50, eps=1e-05, momentum=0.1, affine=True, track_running_stats=False)
    