In [1]:
from gklearn.utils.graphfiles import loadDataset
import networkx as nx
import matplotlib
matplotlib.use('TkAgg')
import matplotlib.pyplot as plt
import numpy as np
import pickle as pkl

In [2]:
from __future__ import print_function
import torch
from torch.utils.data import DataLoader, random_split

device = 'cuda' if torch.cuda.is_available() else 'cpu'

In [3]:
def label_to_color(label):
    if label == 'C':
        return 0.1
    elif label == 'O':
        return 0.8
    
def nodes_to_color_sequence(G):
    return [label_to_color(c[1]['label'][0]) for c in G.nodes(data=True)]

Gs,y = loadDataset('/home/ines/Documents/M2/Stage/stage_ged/Ines/DeepGED/MAO/dataset.ds')
#for e in Gs[13].edges():
#    print(Gs[13][e[0]][e[1]]['bond_type'])
print('edge max label',max(max([[G[e[0]][e[1]]['bond_type'] for e in G.edges()] for G in Gs])))
G1 = Gs[1]
G2 = Gs[9]
print(y[1],y[9])

'''
plt.figure(0)
nx.draw_networkx(G1,with_labels=True,node_color = nodes_to_color_sequence(G1),cmap='autumn')

plt.figure(1)
nx.draw_networkx(G2,with_labels=True,node_color = nodes_to_color_sequence(G2),cmap='autumn')

plt.show()
'''

import extended_label
for g in Gs:
    extended_label.compute_extended_labels(g)
#for v in Gs[10].nodes():
#        print(Gs[10].nodes[v])

#print(nx.to_dict_of_lists(Gs[13]))



#dict={'C':0,'N':1,'O':2}
#A,labels=from_networkx_to_tensor2(Gs[13],dict)
#print(A)
#A1=(A==torch.ones(13,13)).int()
#A2=(A==2*torch.ones(13,13)).int()
#print(A1)
#print(A2)


edge max label 3
0.0 0.0


In [4]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import svd
import rings
from svd import iterated_power as compute_major_axis

#torch.autograd.set_detect_anomaly(True)

class Net(nn.Module):
    
        
    def __init__(self,GraphList,normalize=False,node_label='label'):
        super(Net, self).__init__()   
        self.normalize=normalize
        self.node_label=node_label
        dict,self.nb_edge_labels=self.build_node_dictionnary(GraphList)
        self.nb_labels=len(dict)
        print(self.nb_edge_labels)
        self.device='cuda' if torch.cuda.is_available() else 'cpu'
        nb_node_pair_label=self.nb_labels*(self.nb_labels-1)/2.0
        nb_edge_pair_label=int(self.nb_edge_labels*(self.nb_edge_labels-1)/2)
        
        self.node_weighs=nn.Parameter(torch.tensor(1.0/(nb_node_pair_label+nb_edge_pair_label+2))+(1e-3)*torch.rand(int(self.nb_labels*(self.nb_labels-1)/2+1),requires_grad=True,device=self.device)) # all substitution costs+ nodeIns/Del. old version: 0 node subs, 1 nodeIns/Del, 2 : edgeSubs, 3 edgeIns/Del        
        self.edge_weighs=nn.Parameter(torch.tensor(1.0/(nb_node_pair_label+nb_edge_pair_label+2))+(1e-3)*torch.rand(nb_edge_pair_label+1,requires_grad=True,device=self.device)) #edgeIns/Del
        
        self.card=torch.tensor([G.order() for G in GraphList]).to(self.device)
        card_max=self.card.max()
        self.A=torch.empty((len(GraphList),card_max*card_max),dtype=torch.int,device=self.device)
        self.labels=torch.empty((len(GraphList),card_max),dtype=torch.int,device=self.device)
        print(self.A.shape)
        for k in range(len(GraphList)):
            A,l=self.from_networkx_to_tensor(GraphList[k],dict)             
            self.A[k,0:A.shape[1]]=A[0]
            self.labels[k,0:l.shape[0]]=l
        print('adjacency matrices',self.A)
        print('node labels',self.labels)
        print('order of the graphs',self.card)
        
    def forward(self, input):        
        ged=torch.zeros(len(input)).to(self.device) 
        node_costs,nodeInsDel,edge_costs,edgeInsDel=self.from_weighs_to_costs()
        
            
        
        #print('weighs:',self.weighs.device,'device:',self.device,'card:',self.card.device,'A:',self.A.device,'labels:',self.labels.device)
        for k in range(len(input)):            
            g1=input[k][0]
            g2=input[k][1]
            n=self.card[g1]
            m=self.card[g2]
            
            self.ring_g,self.ring_h = rings.build_rings(g1,edgeInsDel.size()), rings.build_rings(g2,edgeInsDel.size()) 
            
            C=self.construct_cost_matrix(g1,g2,node_costs,edge_costs,nodeInsDel,edgeInsDel)      
            #S=self.mapping_from_similarity(C,n,m)
            #S=self.mapping_from_cost(C,n,m)   
            #S=self.new_mapping_from_cost(C,n,m,g1,g2,node_costs,edge_costs,nodeInsDel,edgeInsDel)
            S=self.mapping_from_cost_sans_FW(n,m,g1,g2,node_costs,edge_costs,nodeInsDel,edgeInsDel)
            
            v=torch.flatten(S)
            
            normalize_factor=1.0
            if self.normalize:
                nb_edge1=(self.A[g1][0:n*n] != torch.zeros(n*n,device=self.device)).int().sum()
                nb_edge2=(self.A[g2][0:m*m] != torch.zeros(m*m,device=self.device)).int().sum()
                normalize_factor=nodeInsDel*(n+m)+edgeInsDel*(nb_edge1+nb_edge2)
            c=torch.diag(C)
            D=C-torch.eye(C.shape[0],device=self.device)*c
            ged[k]=(.5*v.T@D@v+c.T@v)/normalize_factor
        max=torch.max(ged)
        min=torch.min(ged)
        ged=(ged-min)/(max-min)
        
        return ged
    
    def from_weighs_to_costs(self):
        
        #cn=torch.exp(self.node_weighs)
        #ce=torch.exp(self.edge_weighs)
        cn=self.node_weighs*self.node_weighs
        ce=self.edge_weighs*self.edge_weighs
        total_cost=cn.sum()+ce.sum()
        cn=cn/total_cost #/max
        ce=ce/total_cost
        edgeInsDel=ce[-1]

        node_costs=torch.zeros((self.nb_labels,self.nb_labels),device=self.device)
        upper_part=torch.triu_indices(node_costs.shape[0],node_costs.shape[1],offset=1,device=self.device)        
        node_costs[upper_part[0],upper_part[1]]=cn[0:-1]
        node_costs=node_costs+node_costs.T

        if self.nb_edge_labels>1:
            edge_costs=torch.zeros((self.nb_edge_labels,self.nb_edge_labels),device=self.device)
            upper_part=torch.triu_indices(edge_costs.shape[0],edge_costs.shape[1],offset=1,device=self.device)        
            edge_costs[upper_part[0],upper_part[1]]=ce[0:-1]
            edge_costs=edge_costs+edge_costs.T
        else:
            edge_costs=torch.zeros(0,device=self.device)
        
        return node_costs,cn[-1],edge_costs,edgeInsDel
    
    def build_node_dictionnary(self,GraphList):
        #extraction de tous les labels d'atomes
        node_labels=[]
        for G in Gs:
            for v in nx.nodes(G):
                if not G.nodes[v][self.node_label][0] in node_labels:
                    node_labels.append(G.nodes[v][self.node_label][0])
        node_labels.sort()
        #extraction d'un dictionnaire permettant de numéroter chaque label par un numéro.
        dict={}
        k=0
        for label in node_labels:
            dict[label]=k
            k=k+1
        print(node_labels)
        print(dict,len(dict))
    
        return dict,max(max([[int(G[e[0]][e[1]]['bond_type']) for e in G.edges()] for G in GraphList]))
    
    def from_networkx_to_tensor(self,G,dict):    
        A=torch.tensor(nx.to_scipy_sparse_matrix(G,dtype=int,weight='bond_type').todense(),dtype=torch.int)        
        lab=[dict[G.nodes[v][self.node_label][0]] for v in nx.nodes(G)]
   
        return (A.view(1,A.shape[0]*A.shape[1]),torch.tensor(lab))

    def construct_cost_matrix(self,g1,g2,node_costs,edge_costs,nodeInsDel,edgeInsDel):
        n = self.card[g1].item()
        m = self.card[g2].item()
        
        A1=torch.zeros((n+1,n+1),dtype=torch.int,device=self.device)
        A1[0:n,0:n]=self.A[g1][0:n*n].view(n,n)
        A2=torch.zeros((m+1,m+1),dtype=torch.int,device=self.device)
        A2[0:m,0:m]=self.A[g2][0:m*m].view(m,m)
        
        
         # costs: 0 node subs, 1 nodeIns/Del, 2 : edgeSubs, 3 edgeIns/Del
        
        #C=cost[3]*torch.cat([torch.cat([C12[l][k] for k in range(n+1)],1) for l in range(n+1)])
        #Pas bien sur mais cela semble fonctionner.
        C=edgeInsDel*self.matrix_edgeInsDel(A1,A2)
        if self.nb_edge_labels>1:
            for k in range(self.nb_edge_labels):
                for l in range(self.nb_edge_labels):
                    if k != l:
#                    C.add_(self.matrix_edgeSubst(A1,A2,k+1,l+1),alpha=edge_costs[k][l])
                        C=C+edge_costs[k][l]*self.matrix_edgeSubst(A1,A2,k+1,l+1)
        #C=cost[3]*torch.tensor(np.array([ [  k!=l and A1[k//(m+1),l//(m+1)]^A2[k%(m+1),l%(m+1)] for k in range((n+1)*(m+1))] for l in range((n+1)*(m+1))]),device=self.device)        

        l1=self.labels[g1][0:n]
        l2=self.labels[g2][0:m]
        D=torch.zeros((n+1)*(m+1),device=self.device)
        D[n*(m+1):]=nodeInsDel
        D[n*(m+1)+m]=0
        D[[i*(m+1)+m for i in range(n)]]=nodeInsDel
        D[[k for k in range(n*(m+1)) if k%(m+1) != m]]=torch.tensor([node_costs[l1[k//(m+1)],l2[k%(m+1)]] for k in range(n*(m+1)) if k%(m+1) != m],device=self.device )
        mask = torch.diag(torch.ones_like(D))
        C=mask*torch.diag(D) + (1. - mask)*C
        
        #C[range(len(C)),range(len(C))]=D
      
        return C
    def matrix_edgeInsDel(self,A1,A2):
        Abin1=(A1!=torch.zeros((A1.shape[0],A1.shape[1]),device=self.device))
        Abin2=(A2!=torch.zeros((A2.shape[0],A2.shape[1]),device=self.device))
        C1=torch.einsum('ij,kl->ijkl',torch.logical_not(Abin1),Abin2)
        C2=torch.einsum('ij,kl->ijkl',Abin1,torch.logical_not(Abin2))
        C12=torch.logical_or(C1,C2).int()
    
        return torch.cat(torch.unbind(torch.cat(torch.unbind(C12,1),1),0),1)

    def matrix_edgeSubst(self,A1,A2,lab1,lab2):
        Abin1=(A1==lab1*torch.ones((A1.shape[0],A1.shape[1]),device=self.device)).int()
        Abin2=(A2==lab2*torch.ones((A2.shape[0],A2.shape[1]),device=self.device)).int()
        C=torch.einsum('ij,kl->ijkl',Abin1,Abin2)
        
        return torch.cat(torch.unbind(torch.cat(torch.unbind(C,1),1),0),1)
    
    def similarity_from_cost(self,C):
        N=C.shape[0]
             
        #return (torch.norm(C,p='fro')*torch.eye(N,device=self.device) -C)
        return (C.max()*torch.eye(N,device=self.device) -C)
    
    def lsape_populate_instance(self,first_graph,second_graph,average_node_cost, average_edge_cost,alpha,lbda):       #ring_g, ring_h come from global ring with all graphs in so ring_g = rings['g'] and ring_h = rings['h']
        g,h = Gs[first_graph], Gs[second_graph]
        self.average_cost =[average_node_cost, average_edge_cost]
        self.first_graph, self.second_graph = first_graph,second_graph
        
        node_costs,nodeInsDel,edge_costs,edgeInsDel=self.from_weighs_to_costs()

        lsape_instance = [[0 for _ in range(len(g) + 1)] for __ in range(len(h) + 1)]
        for g_node_index in range(len(g) + 1):
            for h_node_index in range(len(h) + 1):
                lsape_instance[h_node_index][g_node_index] = rings.compute_ring_distance(g,h,self.ring_g,self.ring_h,g_node_index,h_node_index,alpha,lbda,node_costs,nodeInsDel,edge_costs,edgeInsDel,first_graph,second_graph)
        for i in lsape_instance :
            i = torch.as_tensor(i)
        lsape_instance = torch.as_tensor(lsape_instance)
        #print(type(lsape_instance))
        return lsape_instance
    
  
    def mapping_from_cost_sans_FW(self,n,m,g1,g2,node_costs,edge_costs,nodeInsDel,edgeInsDel): 
        c_0 =self.lsape_populate_instance(g1,g2,node_costs,edge_costs,nodeInsDel,edgeInsDel)
        x0=svd.eps_assigment_from_mapping(torch.exp(-c_0),10).view((n+1)*(m+1),1)
        return x0
    
    def new_mapping_from_cost(self,C,n,m,g1,g2,node_costs,edge_costs,nodeInsDel,edgeInsDel): 
        c=torch.diag(C)       
        c_0 =self.lsape_populate_instance(g1,g2,node_costs,edge_costs,nodeInsDel,edgeInsDel)
        D=C-torch.eye(C.shape[0],device=self.device)*c
        x0=svd.eps_assigment_from_mapping(torch.exp(-c_0),10).view((n+1)*(m+1),1)
        return svd.franck_wolfe(x0,D,c,5,15,n,m)
    
    
    def mapping_from_cost(self,C,n,m):
        c=torch.diag(C)
        D=C-torch.eye(C.shape[0],device=self.device)*c
        x0=svd.eps_assigment_from_mapping(torch.exp(-.5*c.view(n+1,m+1)),10).view((n+1)*(m+1),1)
        x=svd.franck_wolfe(x0,D,c,5,10,n,m)
        def print_grad(grad):
            if(grad.norm()!= 0.0):
                print(grad)
        
#        x.register_hook(print_grad)
        return x

print('Gs=',len(Gs))
model = Net(Gs,normalize=True,node_label='extended_label')

params = list(model.parameters())
print(len(params))
print(params[0])
#print(model(input))
print(max([G.order() for G in Gs]),len(Gs))
print('toto')

Gs= 68
['C_1C', 'C_1C1C1N', 'C_1C1C2C', 'C_1C1N', 'C_1C1N2C', 'C_1C1O', 'C_1C1O2C', 'C_1C2C', 'C_1C3C', 'C_1N', 'C_1O', 'C_2C', 'C_2C2C', 'C_3C', 'N_1C', 'N_1C1C', 'N_1C1C1C', 'O_1C', 'O_1C1C']
{'C_1C': 0, 'C_1C1C1N': 1, 'C_1C1C2C': 2, 'C_1C1N': 3, 'C_1C1N2C': 4, 'C_1C1O': 5, 'C_1C1O2C': 6, 'C_1C2C': 7, 'C_1C3C': 8, 'C_1N': 9, 'C_1O': 10, 'C_2C': 11, 'C_2C2C': 12, 'C_3C': 13, 'N_1C': 14, 'N_1C1C': 15, 'N_1C1C1C': 16, 'O_1C': 17, 'O_1C1C': 18} 19
3
torch.Size([68, 729])
adjacency matrices tensor([[         0,          1,          0,  ...,          1,      32539,
                165],
        [         0,          1,          0,  ...,      32539,   78283592,
                  0],
        [         0,          1,          0,  ..., -652004368,      32539,
         2115235952],
        ...,
        [         0,          1,          0,  ...,          0,          0,
                  0],
        [         0,          1,          0,  ...,          0,          0,
                  0],
        [

In [5]:
import itertools

nb=len(Gs)
class1=torch.tensor([k for k in range(len(y)) if y[k]==1])
class2=torch.tensor([k for k in range(len(y)) if y[k]==0])

nb_class1=12
nb_class2=int((nb_class1-1)/2)
train_size=nb_class1+nb_class2
#train_size=20

#if train_size % 2 == 0:
#    nb_class1=int(train_size/2)
#    nb_class2=int(train_size/2)
#else:
#    nb_class1=int(train_size/2)+1
#    nb_class2=int(train_size/2)
    
print((torch.abs(10000*torch.randn(nb_class1)).int()%class1.size()[0]).long())
random_class1=class1[(torch.abs(10000*torch.randn(nb_class1)).int()%class1.size()[0]).long()]
random_class2=class2[(torch.abs(10000*torch.randn(nb_class2)).int()%class2.size()[0]).long()]
train_graphs=torch.cat((random_class1,random_class2),0)
print('train graphs:',train_graphs)


couples=torch.triu_indices(train_size,train_size,offset=1)
print('couples=',couples)
print('nb_class1/nb_class2=',nb_class1,nb_class2)
#combinations=itertools.combinations(range(nb),2)

nb_elt=int(nb_class1*(nb_class1+2*nb_class2-1)/2)
print('couples restreints:',couples[:,0:nb_elt])

#nb_elt=int(train_size*(train_size-1)/2)
data=torch.empty((nb_elt,2),dtype=torch.int)
yt=torch.ones(nb_elt)
print('old_size, new size=',nb_elt,.5*nb_class1*(nb_class1+2*nb_class2-1))
data[0:nb_elt,0]=train_graphs[couples[0,0:nb_elt]]
data[0:nb_elt,1]=train_graphs[couples[1,0:nb_elt]]


#data[0:nb_elt,0]=train_graphs[couples[0]]
#data[0:nb_elt,1]=train_graphs[couples[1]]
print(nb_elt)
#couples=[]
for k in range(nb_elt):
    if (y[data[k][0]]!=y[data[k][1]]):
        yt[k]=-1.0        

print('data=',data,len(data))

#print(couples[1:2])

torch.cuda.empty_cache()
if torch.cuda.is_available():
    device = torch.device("cuda:0")          # a CUDA device object    


tensor([20, 14, 14, 29, 28, 27, 11,  1, 23, 16, 11, 14])
train graphs: tensor([60,  4, 19, 27, 66, 25, 36, 59,  7, 23, 14, 45, 40, 57, 47, 35,  0])
couples= tensor([[ 0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  1,  1,
          1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  2,  2,  2,  2,  2,
          2,  2,  2,  2,  2,  2,  2,  2,  2,  3,  3,  3,  3,  3,  3,  3,  3,  3,
          3,  3,  3,  3,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  5,  5,
          5,  5,  5,  5,  5,  5,  5,  5,  5,  6,  6,  6,  6,  6,  6,  6,  6,  6,
          6,  7,  7,  7,  7,  7,  7,  7,  7,  7,  8,  8,  8,  8,  8,  8,  8,  8,
          9,  9,  9,  9,  9,  9,  9, 10, 10, 10, 10, 10, 10, 11, 11, 11, 11, 11,
         12, 12, 12, 12, 13, 13, 13, 14, 14, 15],
        [ 1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16,  2,  3,
          4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16,  3,  4,  5,  6,  7,
          8,  9, 10, 11, 12, 13, 14, 15, 16,  4,  5,  6,  7,  8,

In [6]:
#[train_D, valid_D,train_L,valid_L]= train_test_split(Gs,y, test_size=0.25,train_size=0.75, shuffle=True) #, stratify=yt)
  


In [7]:
#[train_D, valid_D,train_L,valid_L]= train_test_split(Gs,y, test_size=0.25,train_size=0.75, shuffle=True) #, stratify=yt)
  

def creating_couples_after_splitting(train_D, valid_D,train_L,valid_L):
    couples_train=[]
    couples_test_train=[]
    for i,g1_idx in enumerate(train_D): 
        for j,g2_idx in enumerate(train_D):
            n=g1_idx.order()
            m=g2_idx.order()
            #print([n,m])
            couples_train.append([n,m])
    yt=np.ones(len(couples_train))
    #print(yt,len(yt))
    for k in couples_train:
        if (y[k[0]]!=y[k[1]]):
            yt[k]=-1.0  
    for i,g1_idx in enumerate(valid_D):
        for j,g2_idx in enumerate(train_D):
            n=g1_idx.order()
            m=g2_idx.order()
            couples_test_train.append([n,m])
            
    yv=np.ones(len(couples_test_train))
    #print(yt,len(yt))
    for k in couples_test_train:
        if (y[k[0]]!=y[k[1]]):
            yv[k]=-1.0
            
    return torch.tensor(couples_train),yt,torch.tensor(couples_test_train),yv
    


In [8]:
y[11]



0.0

In [9]:
import random

def different_sets(my_train_D,my_valid_D):
    #my_list=[i for i in range(68)] 
    cp=my_valid_D

    for i in range(len(my_valid_D)):
        if my_valid_D[i] in my_train_D:
            tmp=random.choice(Gs)
            if tmp not in my_train_D: 
                cp[i]=tmp
        #else: cp[i]=my_valid_D[i]
    my_valid_D=cp
    
    return my_train_D,my_valid_D

In [10]:
def different_sets_ints(my_train_D,my_valid_D):
    my_list=[i for i in range(68)] 
    cp=my_valid_D

    for i in range(len(my_valid_D)):
        if my_valid_D[i] in my_train_D:
            tmp=random.choice(my_list)
            if tmp not in my_train_D: 
                cp[i]=tmp
        #else: cp[i]=my_valid_D[i]
    my_valid_D=cp
    
    return my_train_D,my_valid_D

In [11]:
Gs[12]

<networkx.classes.graph.Graph at 0x7f1b292ea280>

In [12]:
def getting_data(train_D, valid_D,train_L,valid_L):
    my_train_D=[]
    my_valid_D=[]
    for i,g1_idx in enumerate(train_D): 
        n=g1_idx.order()
        my_train_D.append(n)
    for i,g1_idx in enumerate(valid_D): 
        n=g1_idx.order()
        my_valid_D.append(n)
        
    my_train_D,my_valid_D=different_sets_ints(my_train_D,my_valid_D)
    
    return my_train_D,my_valid_D

#my_train_D,my_valid_D,train_L,valid_L=getting_data(train_D, valid_D,train_L,valid_L)


In [16]:
from sklearn.model_selection import train_test_split
from sklearn import datasets
from torch.utils.data import DataLoader, random_split, TensorDataset

def splitting(data):
    
    #[train_D, valid_D,train_L,valid_L]= train_test_split(data,yt, test_size=0.25,train_size=0.75, shuffle=True) #, stratify=yt)
    [train_D, valid_D,train_L,valid_L]= train_test_split(Gs,y, test_size=0.25,train_size=0.75, shuffle=True, stratify=y)
    train_D, valid_D=different_sets(train_D,valid_D)
    
    couples_train,yt,couples_test_train,yv = creating_couples_after_splitting(train_D, valid_D,train_L,valid_L)
    yt=torch.tensor(yt)
    yv=torch.tensor(yv)
    #DatasetTrain = TensorDataset(train_D, train_L)
    DatasetTrain = TensorDataset(couples_train,yt) 
    
    #DatasetValid=TensorDataset(valid_D, valid_L)
    DatasetValid=TensorDataset(couples_test_train, yv)

    trainloader=torch.utils.data.DataLoader(DatasetTrain,batch_size=len(couples_train),shuffle=True,drop_last=True, num_workers=0)

    validationloader=torch.utils.data.DataLoader(DatasetValid, batch_size=128, drop_last=True,num_workers=0)

    '''
    print(len(train_D), len(valid_D))
    print(len(train_L), len(valid_L))
    print(len(trainloader),len(validationloader))
    for i,j in validationloader:
        print('i : ',i,'\n j : ',j)
    '''
    
    print(len(trainloader),len(validationloader))
    print(len(trainloader),len(validationloader))
    
    train_D,valid_D=getting_data(train_D, valid_D,train_L,valid_L)
    torch.save(train_D, 'train_D', pickle_module=pkl) 
    torch.save(valid_D, 'valid_D', pickle_module=pkl) 
    torch.save(train_L, 'train_L', pickle_module=pkl) 
    torch.save(valid_L, 'valid_L', pickle_module=pkl) 
    
    return trainloader,validationloader

In [17]:
#if torch.cuda.device_count() > 1:
#  print("Let's use", torch.cuda.device_count(), "GPUs!")
  # dim = 0 [30, xxx] -> [10, ...], [10, ...], [10, ...] on 3 GPUs
#  model = nn.DataParallel(model)
from triangular_losses import TriangularConstraint as triangular_constraint

model.to(device)
def classification(model,data,yt,nb_iter):
    
    trainloader,validationloader=splitting(data)
    criterion = torch.nn.HingeEmbeddingLoss(margin=1.0,reduction='mean')
    criterionTri=triangular_constraint()
    optimizer = torch.optim.Adam(model.parameters()) #, lr=1e-3
    #print(device)

    #torch.cat((same_class[0:20],diff_class[0:20]),0).to(device)
    whole_input=data.to(device)
    target=yt.to(device) 
    #torch.ones(40,device=device)
    #target[20:]=-1.0
    #target=(yt[0:20]).to(device)
    InsDel=np.empty((nb_iter,2))
    node_costs,nodeInsDel,edge_costs,edgeInsDel=model.from_weighs_to_costs()
    nodeSub=np.empty((nb_iter,int(node_costs.shape[0]*(node_costs.shape[0]-1)/2)))
    edgeSub=np.empty((nb_iter,int(edge_costs.shape[0]*(edge_costs.shape[0]-1)/2)))
    loss_plt=np.empty(nb_iter)
    loss_train_plt=np.empty(nb_iter)
    loss_valid_plt=np.empty(nb_iter)
    min_valid_loss = np.inf
    iter_min_valid_loss = 0
    
    for t in range(nb_iter):    
        train_loss = 0.0
        valid_loss = 0.0
        tmp=np.inf
        for train_data,train_labels in trainloader:
            optimizer.zero_grad()
            
            inputt=train_data.to(device)
            
            # Forward pass: Compute predicted y by passing data to the model
            y_pred = model(whole_input).to(device)

            # Compute and print loss
            loss = criterion(y_pred, target).to(device)
            node_costs,nodeInsDel,edge_costs,edgeInsDel=model.from_weighs_to_costs()
            triangularInq=criterionTri(node_costs,nodeInsDel,edge_costs,edgeInsDel)
            loss=loss*(1+triangularInq)
            loss.to(device)
            # Zero gradients, perform a backward pass, and update the weights.
            #optimizer.zero_grad() 
            loss.backward()
            optimizer.step()
            print('loss.item of the train = ', t, loss.item())
            train_loss =+ loss.item() #* train_data.size(0) 
            if (loss.item()<tmp): tmp=loss.item()
        loss_plt[t]=loss.item()  
        loss_train_plt[t]=train_loss /len(trainloader)
        #loss_plt[t]=tmp
            
        if t % 100 == 99 or t==0:   
            print('ged=',y_pred*target)  #train_labels
            print('Distances: ',y_pred)
            print('Loss Triangular:',triangularInq.item())
            print('node_costs :')
            print(node_costs)
            print('nodeInsDel:',nodeInsDel.item())
            print('edge_costs :')
            print(edge_costs)
            print('edgeInsDel:',edgeInsDel.item())
            
            
            
        for valid_data,valid_labels in validationloader:
            
            inputt=valid_data.to(device)
            y_pred = model(inputt).to(device)
            # Compute and print loss
            loss = criterion(y_pred, valid_labels).to(device)    
            loss.to(device)
            
            print('loss.item of the valid = ', t, loss.item())  
            valid_loss = valid_loss + loss.item() #* valid_data.size(0)
            
        loss_valid_plt[t]=valid_loss / len(validationloader)   
        
        InsDel[t][0]=nodeInsDel.item()
        InsDel[t][1]=edgeInsDel.item()
        
        
        k=0
        for p in range(node_costs.shape[0]):
            for q in range(p+1,node_costs.shape[0]):
                nodeSub[t][k]=node_costs[p][q]
                k=k+1
        k=0
        for p in range(edge_costs.shape[0]):
            for q in range(p+1,edge_costs.shape[0]):
                edgeSub[t][k]=edge_costs[p][q]
                k=k+1
        
            
        print(f'Iteration {t+1} \t\t Training Loss: {train_loss / len(trainloader)} \t\t Validation Loss: {valid_loss/len(validationloader)}')
        if min_valid_loss > valid_loss:
            print(f'Validation Loss Decreased({min_valid_loss:.6f}--->{valid_loss:.6f})')
            min_valid_loss = valid_loss
            iter_min_valid_loss = t
            nodeSub_min = node_costs
            edgeSub_min = edge_costs
            nodeInsDel_min = nodeInsDel
            edgeInsDel_min = edgeInsDel
            
            
    print('iter and min_valid_loss = ',iter_min_valid_loss, min_valid_loss)
    '''
    nodeInsDel_min = InsDel[iter_min_valid_loss][0]
    edgeInsDel_min = InsDel[iter_min_valid_loss][1]
    nodeSub_min = nodeSub[iter_min_valid_loss]
    edgeSub_min = edgeSub[iter_min_valid_loss]
    '''
    print(' Min cost for nodeInsDel = ', nodeInsDel_min)
    print(' Min cost for edgeInsDel = ', edgeInsDel_min)
    print(' Min cost for nodeSub = ', nodeSub_min)
    print(' Min cost for edgeSub = ', edgeSub_min)
    torch.save(nodeInsDel_min, 'nodeInsDel_min', pickle_module=pkl) 
    torch.save(edgeInsDel_min, 'edgeInsDel_min', pickle_module=pkl) 
    torch.save(nodeSub_min, 'nodeSub_min', pickle_module=pkl) 
    torch.save(edgeSub_min, 'edgeSub_min', pickle_module=pkl)
    return InsDel,nodeSub,edgeSub,loss_plt,loss_valid_plt,loss_train_plt



In [18]:
nb_iter=100
InsDel, nodeSub,edgeSub,loss_plt,loss_valid_plt,loss_train_plt=classification(model,data,yt,nb_iter)
plt.figure(0)
plt.plot(InsDel[0:nb_iter,0],label="node")
plt.plot(InsDel[0:nb_iter,1],label="edge")
plt.title('Node/Edge insertion/deletion costs')
plt.legend()
plt.figure(1)
for k in range(nodeSub.shape[1]):
    plt.plot(nodeSub[0:nb_iter,k])
plt.title('Node Substitutions costs')
plt.figure(2)
for k in range(edgeSub.shape[1]):
    plt.plot(edgeSub[0:nb_iter,k])
plt.title('Edge Substitutions costs')
plt.figure(3)
plt.plot(loss_plt)
plt.title('Evolution of the train loss (loss_plt)')

plt.figure(4)
plt.plot(loss_valid_plt)
plt.title('Evolution of the valid loss')
'''
plt.figure(5)
plt.plot(loss_train_plt)
plt.title('Evolution of the loss_train_plt')
'''
plt.show()
plt.close()

1 6
1 6
loss.item of the train =  0 0.3707982003688812
ged= tensor([ 0.1730,  0.1821,  0.4786,  0.0155,  0.2515,  0.0271,  0.0280,  0.1985,
         0.2855,  0.1730,  0.2527, -0.5683, -0.5647, -0.4285, -0.3781, -0.3958,
         0.2351,  0.6524,  0.2649,  0.3830,  0.2820,  0.2799,  0.2542,  0.2852,
         0.0000,  0.3740, -0.8625, -0.8641, -0.5820, -0.5229, -0.2655,  0.5676,
         0.2204,  0.2773,  0.2409,  0.2486,  0.0029,  0.3307,  0.1529,  0.2779,
        -0.6091, -0.6056, -0.5165, -0.4322, -0.3459,  0.2638,  0.2125,  0.2674,
         0.2530,  0.3150,  0.2132,  0.3888,  0.2011, -0.2597, -0.2695, -0.0199,
        -0.2028, -1.0000,  0.2509,  0.0271,  0.0280,  0.1925,  0.2815,  0.1704,
         0.2527, -0.5655, -0.5619, -0.4180, -0.3781, -0.3946,  0.1967,  0.2029,
         0.1945,  0.2355,  0.2146,  0.0264, -0.4665, -0.4705, -0.3672, -0.3238,
        -0.5402,  0.0327,  0.1906,  0.2798,  0.1850,  0.2585, -0.5997, -0.5991,
        -0.4307, -0.3960, -0.3953,  0.1888,  0.2993,  0.1846

loss.item of the train =  5 0.3529166281223297
loss.item of the valid =  5 0.3509775698184967
loss.item of the valid =  5 0.38898828625679016
loss.item of the valid =  5 0.3509843647480011
loss.item of the valid =  5 0.29665690660476685
loss.item of the valid =  5 0.3224833011627197
loss.item of the valid =  5 0.4702318012714386
Iteration 6 		 Training Loss: 0.3529166281223297 		 Validation Loss: 0.36338703831036884
loss.item of the train =  6 0.35251033306121826
loss.item of the valid =  6 0.3520623743534088
loss.item of the valid =  6 0.3898161053657532
loss.item of the valid =  6 0.3519844710826874
loss.item of the valid =  6 0.29715192317962646
loss.item of the valid =  6 0.3231700360774994
loss.item of the valid =  6 0.472003310918808
Iteration 7 		 Training Loss: 0.35251033306121826 		 Validation Loss: 0.3643647034962972
loss.item of the train =  7 0.3523334264755249
loss.item of the valid =  7 0.35301119089126587
loss.item of the valid =  7 0.3912966549396515
loss.item of the va

loss.item of the valid =  24 0.39017343521118164
loss.item of the valid =  24 0.3539820611476898
loss.item of the valid =  24 0.2979559600353241
loss.item of the valid =  24 0.3232364356517792
loss.item of the valid =  24 0.4731743037700653
Iteration 25 		 Training Loss: 0.3528459072113037 		 Validation Loss: 0.3650108774503072
loss.item of the train =  25 0.3527354300022125
loss.item of the valid =  25 0.35173794627189636
loss.item of the valid =  25 0.3903728425502777
loss.item of the valid =  25 0.353945255279541
loss.item of the valid =  25 0.2978944182395935
loss.item of the valid =  25 0.32333728671073914
loss.item of the valid =  25 0.47355926036834717
Iteration 26 		 Training Loss: 0.3527354300022125 		 Validation Loss: 0.3651411682367325
loss.item of the train =  26 0.35263559222221375
loss.item of the valid =  26 0.3519977331161499
loss.item of the valid =  26 0.39053773880004883
loss.item of the valid =  26 0.35394299030303955
loss.item of the valid =  26 0.29786205291748047

loss.item of the valid =  43 0.35137632489204407
loss.item of the valid =  43 0.2971450984477997
loss.item of the valid =  43 0.322230726480484
loss.item of the valid =  43 0.4710312783718109
Iteration 44 		 Training Loss: 0.35215723514556885 		 Validation Loss: 0.3637576500574748
loss.item of the train =  44 0.3523136079311371
loss.item of the valid =  44 0.3512944281101227
loss.item of the valid =  44 0.3886096179485321
loss.item of the valid =  44 0.3509925901889801
loss.item of the valid =  44 0.2971130609512329
loss.item of the valid =  44 0.3218202292919159
loss.item of the valid =  44 0.47024717926979065
Iteration 45 		 Training Loss: 0.3523136079311371 		 Validation Loss: 0.3633461842934291
loss.item of the train =  45 0.3524223268032074
loss.item of the valid =  45 0.35101643204689026
loss.item of the valid =  45 0.3882302939891815
loss.item of the valid =  45 0.350691556930542
loss.item of the valid =  45 0.2970832884311676
loss.item of the valid =  45 0.3215077519416809
loss

loss.item of the valid =  62 0.32269221544265747
loss.item of the valid =  62 0.47194889187812805
Iteration 63 		 Training Loss: 0.35224470496177673 		 Validation Loss: 0.3642480621735255
loss.item of the train =  63 0.3521175682544708
loss.item of the valid =  63 0.352382630109787
loss.item of the valid =  63 0.39011192321777344
loss.item of the valid =  63 0.35225197672843933
loss.item of the valid =  63 0.29727447032928467
loss.item of the valid =  63 0.32309022545814514
loss.item of the valid =  63 0.47268030047416687
Iteration 64 		 Training Loss: 0.3521175682544708 		 Validation Loss: 0.36463192105293274
loss.item of the train =  64 0.3519856035709381
loss.item of the valid =  64 0.35273614525794983
loss.item of the valid =  64 0.3908405601978302
loss.item of the valid =  64 0.3526584804058075
loss.item of the valid =  64 0.29733747243881226
loss.item of the valid =  64 0.3235118091106415
loss.item of the valid =  64 0.4734836518764496
Iteration 65 		 Training Loss: 0.35198560357

loss.item of the valid =  81 0.4713403582572937
Iteration 82 		 Training Loss: 0.352275550365448 		 Validation Loss: 0.36388420065244037
loss.item of the train =  82 0.3522289991378784
loss.item of the valid =  82 0.35189563035964966
loss.item of the valid =  82 0.38939008116722107
loss.item of the valid =  82 0.35165831446647644
loss.item of the valid =  82 0.2971841096878052
loss.item of the valid =  82 0.32249167561531067
loss.item of the valid =  82 0.4716228246688843
Iteration 83 		 Training Loss: 0.3522289991378784 		 Validation Loss: 0.36404043932755786
loss.item of the train =  83 0.35217487812042236
loss.item of the valid =  83 0.35206544399261475
loss.item of the valid =  83 0.3896031081676483
loss.item of the valid =  83 0.3518378436565399
loss.item of the valid =  83 0.2972147464752197
loss.item of the valid =  83 0.3226996660232544
loss.item of the valid =  83 0.4719713032245636
Iteration 84 		 Training Loss: 0.35217487812042236 		 Validation Loss: 0.36423201858997345
loss

loss.item of the valid =  99 0.3515052795410156
loss.item of the valid =  99 0.3889026641845703
loss.item of the valid =  99 0.35125741362571716
loss.item of the valid =  99 0.2971002161502838
loss.item of the valid =  99 0.32206645607948303
loss.item of the valid =  99 0.4707724153995514
Iteration 100 		 Training Loss: 0.35226815938949585 		 Validation Loss: 0.3636007408301036
iter and min_valid_loss =  0 2.1007999181747437
 Min cost for nodeInsDel =  tensor(0.0055, grad_fn=<SelectBackward>)
 Min cost for edgeInsDel =  tensor(0.0053, grad_fn=<SelectBackward>)
 Min cost for nodeSub =  tensor([[0.0000, 0.0062, 0.0056, 0.0058, 0.0066, 0.0061, 0.0059, 0.0061, 0.0050,
         0.0052, 0.0057, 0.0049, 0.0056, 0.0056, 0.0060, 0.0050, 0.0052, 0.0050,
         0.0062],
        [0.0062, 0.0000, 0.0055, 0.0063, 0.0061, 0.0049, 0.0060, 0.0053, 0.0065,
         0.0065, 0.0064, 0.0052, 0.0054, 0.0050, 0.0048, 0.0057, 0.0053, 0.0065,
         0.0065],
        [0.0056, 0.0055, 0.0000, 0.0056, 0.0061,

In [None]:
A=torch.tensor(nx.to_scipy_sparse_matrix(Gs[0],dtype=int,weight='bond_type').todense(),dtype=torch.int) 
print(A)

In [None]:
plt.plot(loss_plt, label='train loss')
plt.plot(loss_valid_plt, label='valid loss')
plt.title('Train and valid losses')
plt.legend()
plt.show()

In [None]:
card = torch.tensor([G.order() for G in Gs]).to(device)
print(Gs[0].order())

In [None]:
plt.figure(0)
plt.plot(InsDel[0:500,0],label="node")
plt.plot(InsDel[0:500,1],label="edge")
plt.title('Node/Edge insertion/deletion costs')
plt.legend()
plt.figure(1)
for k in range(nodeSub.shape[1]):
    plt.plot(nodeSub[0:500,k])
plt.title('node Substitution costs')
plt.figure(2)
for k in range(edgeSub.shape[1]):
    plt.plot(edgeSub[0:500,k])
plt.title('edge Substitution costs')
plt.show()
plt.close()