In [None]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import optuna
from sklearn.metrics import f1_score
from sklearn.linear_model import LogisticRegression

import torch
import torch_geometric.transforms as T

from torch_geometric.data import Data
from torch_geometric.loader import NeighborSampler
from torch.optim import lr_scheduler
from torch_geometric.datasets import Planetoid,WikipediaNetwork,Actor,WebKB

from modules.model import Net
from modules.sampling import SamplerContextMatrix, SamplerRandomWalk, SamplerFactorization, SamplerAPP

In [None]:
analysis = pd.read_csv('../results/data_analysis_realdata.csv')
analysis = analysis.drop(columns='Unnamed: 0') 
analysis

synthetic = False

In [None]:
benchmark_data_dir = "../data_benchmark/"
help_data = "../data_help/"

In [None]:
if synthetic:
    datasets_names=[]
    for l_a_trgt in [0.1,0.5,0.9]:
                for f_a_trgt in [0.1,0.5,0.9]:
                    for cl_trgt in [0.01,0.1,0.2,0.3,0.5]:
                        for asp_trgt in [2,3,4,5,6,7]:
                            for a_deg_trgt in [2,5,10,15,20,25,30,35,40]:
                                datasets_names.append((l_a_trgt,f_a_trgt,cl_trgt,asp_trgt,a_deg_trgt))
    def data_load(name):
        x = torch.tensor(np.load(f'{benchmark_data_dir}/graph_'+str(name)+'_attr.npy'),dtype=torch.float)
        edge_list = torch.tensor(np.load(f'{benchmark_data_dir}/graph_'+str(name)+'_edgelist.npy')).t()
        y =  torch.tensor(np.load(f'{benchmark_data_dir}/graph_'+str(name)+'_labels.npy'))
        data=Data(x=x,edge_index=edge_list,y=y)
        indices=list(range(len(data.x)))

        train_indices = torch.tensor(indices[:int(0.7*len(indices)+1)])
        val_indices = torch.tensor(indices[int(0.7*len(indices)+1):int(0.8*len(indices)+1)])
        test_indices = torch.tensor(indices[int(0.8*len(indices)+1):])
        train_mask = torch.tensor([False]*len(indices))
        test_mask = torch.tensor([False]*len(indices))
        val_mask = torch.tensor([False]*len(indices))
        train_mask[train_indices] =True
        test_mask[test_indices]=True
        val_mask[val_indices]=True
        return data, train_indices,val_indices,test_indices,train_mask,val_mask,test_mask
else:
    datasets_names = ['Cornell','Texas','Wisconsin','Actor','Pubmed','squirrel']

    def data_load(name):
        if name == 'Cora' or name == 'Citeseer' or name == 'Pubmed':
            data = Planetoid(root='/tmp/'+str(name), name=name,transform=T.NormalizeFeatures())[0]
        elif name == 'Actor':
            data = Actor(root='/tmp/actor',transform=T.NormalizeFeatures())[0]
        elif name == "Cornell" or name=="Texas" or name=="Wisconsin":
            data = WebKB(root='/tmp/'+str(name),name=name,transform=T.NormalizeFeatures())[0]
        elif name == 'squirrel' or name=='chameleon':
            data = WikipediaNetwork(root='/tmp/'+str(name), name=name,transform=T.NormalizeFeatures())[0]

        indices=list(range(len(data.x)))

        train_indices = torch.tensor(indices[:int(0.7*len(indices)+1)])
        val_indices = torch.tensor(indices[int(0.7*len(indices)+1):int(0.8*len(indices)+1)])
        test_indices = torch.tensor(indices[int(0.8*len(indices)+1):])
        train_mask = torch.tensor([False]*len(indices))
        test_mask = torch.tensor([False]*len(indices))
        val_mask = torch.tensor([False]*len(indices))
        train_mask[train_indices] =True
        test_mask[test_indices]=True
        val_mask[val_indices]=True
        return data, train_indices,val_indices,test_indices,train_mask,val_mask,test_mask

In [None]:
import pickle
import os

class Main:
    def __init__(self,name, conv, device, loss_function, mode):
        data, train_indices,val_indices,test_indices,train_mask,val_mask,test_mask = data_load(name)
        self.Conv = conv
        self.device = device
        self.x = data.x
        self.y = data.y.squeeze()
        self.data=data.to(device)
        self.loss = loss_function
        self.mode = mode
        self.datasetname=name
        self.train_indices =train_indices# torch.tensor(indices[:int(0.7*len(indices)+1)])
        self.val_indices =val_indices# torch.tensor(indices[int(0.7*len(indices)+1):int(0.8*len(indices)+1)])
        self.test_indices = test_indices#torch.tensor(indices[int(0.8*len(indices)+1):])
        self.train_mask = train_mask#torch.tensor([False]*len(indices))
        self.test_mask = test_mask#torch.tensor([False]*len(indices))
        self.val_mask =val_mask# torch.tensor([False]*len(indices))
        self.flag = self.loss["flag_tosave"]
        super(Main, self).__init__()
    def sampling(self,Sampler, epoch, nodes, loss):
        if (epoch == 0): 
            if self.flag:  
                if "alpha" in self.loss: 
                    name_of_file = self.datasetname+"_samples_"+loss["Name"]+"_alpha_"+str(loss["alpha"])+".pickle"
                elif "betta" in self.loss: 
                    name_of_file = self.datasetname+"_samples_"+loss["Name"]+"_betta_"+str(loss["betta"])+".pickle"
                else:
                    name_of_file = self.datasetname+"_samples_"+loss["Name"]+".pickle"             
                
                if os.path.exists(f'{help_data}/'+str(name_of_file)):
                    with open(f'{help_data}/'+str(name_of_file),'rb') as f:
                        self.samples = pickle.load(f)
                else:
                    self.samples = Sampler.sample(nodes) 
                    with open(f'{help_data}/'+str(name_of_file),'wb') as f:
                        pickle.dump(self.samples,f)
            else:
                self.samples = Sampler.sample(nodes)
 
    def train(self, model,data,optimizer,Sampler,train_loader,dropout,epoch,loss):
        model.train()   
        total_loss = 0
        optimizer.zero_grad()
       # print('train loader',len(train_loader))
        
        if model.mode == 'unsupervised':
            if model.conv=='GCN':
                arr = torch.nonzero(self.train_mask == True)
                indices_of_train_data = ([item for sublist in arr for item in sublist])
                #print('before',data.x)
                out = model.inference(data.to(self.device),dp=dropout)
                #print('after',out, sum(sum(out)))
                samples = self.sampling(Sampler,epoch, indices_of_train_data,loss)
                loss = model.loss(out[self.train_mask], self.samples)
                #print('loss',loss)
                total_loss+=loss
            else:
                for batch_size, n_id, adjs in train_loader:
                    if len(train_loader.sizes) == 1:
                        adjs = [adjs]
                    adjs = [adj.to(self.device) for adj in adjs]
                    out = model.forward(data.x[n_id.to(self.device)].to(self.device), adjs)
                    self.sampling(Sampler,epoch,n_id[:batch_size],loss)                 
                    loss = model.loss(out, self.samples)#pos_batch.to(device), neg_batch.to(device))
                    total_loss+=loss
            total_loss.backward()
            optimizer.step()      
            return total_loss /len(train_loader)
        elif model.mode== 'supervised':
            if model.conv=='GCN':
                out = model.inference(data.to(self.device),dp=dropout)
                y=self.y.to(self.device)
                loss = model.loss_sup(out[self.train_mask],y[self.train_mask])
                total_loss+=loss
            else:
                for batch_size, n_id, adjs in train_loader:
                    if len(train_loader.sizes) == 1:
                        adjs = [adjs]
                    adjs = [adj.to(self.device) for adj in adjs]
                    out = model.forward(data.x[n_id].to(self.device), adjs)
                    y=self.y.to(self.device)
                    loss = model.loss_sup(out,y[n_id[:batch_size]])
                    total_loss+=loss
            total_loss.backward(retain_graph=True)
            optimizer.step()      
            return total_loss /len(train_loader)       

    @torch.no_grad()
    def test(self, model, data, c):
        model.eval()
        out = model.inference(data.to(self.device))
        y_true = self.y.cpu().detach().numpy()
        self.y=self.y.cpu()
        if model.mode == 'supervised':
            y_true = self.y.unsqueeze(-1)
            y_pred = out.cpu().argmax(dim=-1, keepdim=True)

            accs_micro = []
            accs_macro = []
            for mask in [self.train_mask,self.test_mask,self.val_mask]:
                accs_micro += [f1_score(self.y.detach()[mask].cpu().numpy(),y_pred[mask], average='micro')]
                accs_macro += [f1_score(self.y.detach()[mask].cpu().numpy(),y_pred[mask], average='macro')]
                
            return accs_micro,accs_macro
            
        elif model.mode == 'unsupervised': 
                clf = LogisticRegression(max_iter = 3000,C=c).fit(out.cpu().detach()[self.train_mask].numpy(), self.y.detach()[self.train_mask].numpy())
                accs_micro = []
                accs_macro = []
                for mask in [self.train_mask,self.test_mask,self.val_mask]:
                    accs_micro += [f1_score(self.y.detach()[mask].cpu().numpy(),clf.predict(out.cpu().detach()[mask].numpy()), average='micro')]
                    accs_macro += [f1_score(self.y.detach()[mask].cpu().numpy(),clf.predict(out.cpu().detach()[mask].numpy()), average='macro')]

                return accs_micro,accs_macro
        
                

    def run(self,params):
        
        hidden_layer=params['hidden_layer']
        out_layer=params['out_layer']
        dropout=params['dropout']
        size=params['size of network, number of convs']
        learning_rate=params['lr']
        c=params['c']

        #hidden_layer=64,out_layer=128,dropout=0.0,size=1,learning_rate=0.001,c=100
        classifier = "logistic regression"
        train_loader = NeighborSampler(self.data.edge_index, node_idx=self.train_mask, batch_size = int(sum(self.train_mask)), sizes=[-1]*size)
        
        Sampler = self.loss["Sampler"]
        LossSampler = Sampler(self.datasetname, self.data, device=device, mask=self.train_mask, loss_info=self.loss, help_dir=help_data)
        model = Net(dataset = self.data,mode=self.mode,conv=self.Conv,loss_function=self.loss,device=device,hidden_layer=hidden_layer,out_layer =out_layer,num_layers = (size),dropout = dropout)
        model.to(self.device)

        optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate,weight_decay = 1e-5)
                #scheduler = lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', factor=0.1, patience=10, threshold=0.01, threshold_mode='rel', cooldown=0, min_lr=0, eps=1e-08, verbose=False)
        scheduler=lr_scheduler.StepLR(optimizer, step_size=25,gamma=0.1)
        losses=[]
        train_accs_mi=[]
        test_accs_mi=[]
        val_accs=[]
        name_of_plot='conv: '+model.conv+', mode: '+model.mode+', loss from '+self.loss["Name"]
        train_accs_ma = []
        test_accs_ma = []
        print(name_of_plot)
        log = 'Loss: {:.4f}, Epoch: {:03d}, Train acc micro: {:.4f}, Test acc micro: {:.4f},Train acc macro: {:.4f}, Test acc macro: {:.4f}'
         
        for epoch in range(100):
                    print(epoch)
                    loss = self.train(model,self.data,optimizer,LossSampler,train_loader,dropout,epoch,self.loss)
                    losses.append(loss.detach().cpu())
                    [train_acc_mi, test_acc_mi,val_acc_mi],[train_acc_ma, test_acc_ma,val_acc_ma] = self.test(model,self.data,'logistic regression',c)
                    train_accs_mi.append(train_acc_mi)
                    test_accs_mi.append(test_acc_mi)
                    train_accs_ma.append(train_acc_ma)
                    test_accs_ma.append(test_acc_ma)
                    print(log.format(loss, epoch, train_acc_mi, test_acc_mi,train_acc_ma , test_acc_ma))
        
                    
                     #scheduler.step()
        print(log.format(loss, epoch, train_acc_mi, test_acc_mi,train_acc_ma , test_acc_ma))
        plt.plot(losses)
        plt.title(name_of_plot+' loss')
        plt.xlabel('epoch')
        plt.ylabel('loss')
        plt.show()
        plt.plot(test_accs_mi)
        plt.title(name_of_plot+' test f1 micro')
        plt.xlabel('epoch')
        plt.ylabel('loss')
        plt.show()
                  
        plt.plot(test_accs_ma)
        plt.title(name_of_plot+' test f1 macro')
        plt.xlabel('epoch')
        plt.ylabel('loss')
        plt.show()
        return train_acc_mi, test_acc_mi,train_acc_ma , test_acc_ma

In [None]:
class MainOptuna(Main):
    def objective(self,trial):
        # Integer parameter
        hidden_layer = trial.suggest_categorical("hidden_layer", [32,64,128,256])
        out_layer = trial.suggest_categorical("out_layer", [32,64,128])
        dropout = trial.suggest_float("dropout", 0.0,0.5,step = 0.1)
        size = trial.suggest_categorical("size of network, number of convs", [1,2,3])
        Conv = self.Conv
        learning_rate= trial.suggest_float("lr",5e-3,1e-2)
        c =trial.suggest_categorical("c",  [0.001, 0.01, 0.1,0.2,0.3,0.4,0.5,0.6,0.7,0.8,0.9,1,10,20,30,100])
        # варьируем параметры
        loss_to_train={}
        for name in self.loss:
            
            if type(self.loss[name]) == list :
                if len(self.loss[name]) == 3:
                    var = trial.suggest_int(name,self.loss[name][0],self.loss[name][1],step=self.loss[name][2])
                    loss_to_train[name] = var
                elif len(self.loss[name]) == 2:
                    var_2 = trial.suggest_float(name,self.loss[name][0],self.loss[name][1])
                    loss_to_train[name] = var_2
                else:
                    var_3 = trial.suggest_categorical(name, self.loss[name])
                    loss_to_train[name] = var_3
            else:
                loss_to_train[name] = self.loss[name]
        if name =='q' and type(self.loss[name]) == list:
            var_5 = trial.suggest_categorical('p', self.loss['p'])
            var_4 = trial.suggest_categorical('q', self.loss[name]) 
            if var_4 > 1:
                var_4=1
            if var_5 < var_4:     
                var_5=var_4
            loss_to_train['q'] = var_4
            loss_to_train['p'] = var_5
                
        Sampler =loss_to_train["Sampler"]
        model = Net(dataset = self.data,mode=self.mode,conv=Conv,loss_function=loss_to_train,device=device,hidden_layer=hidden_layer,out_layer =out_layer,num_layers = size,dropout = dropout)
        train_loader = NeighborSampler(self.data.edge_index, batch_size = int(sum(self.train_mask)),node_idx=self.train_mask, sizes=[-1]*size)
       
        LossSampler = Sampler(self.datasetname,self.data,device=self.device,mask=self.train_mask,loss_info=loss_to_train, help_dir=help_data)
        model.to(self.device)
        optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate,weight_decay = 1e-5)  

        for epoch in range(50):
            loss = self.train(model,self.data,optimizer,LossSampler,train_loader,dropout,epoch,loss_to_train)
        [train_acc_mi, test_acc_mi,val_acc_mi], [train_acc_ma, test_acc_ma,val_acc_ma] = self.test(model=model, data=self.data, c=c)
        trial.report(np.sqrt(val_acc_mi*val_acc_ma), epoch)
        return np.sqrt(val_acc_mi*val_acc_ma)

    
    def run(self,number_of_trials):

        study = optuna.create_study(direction="maximize",study_name=self.loss["Name"]+" loss,"+str(self.Conv)+" conv")
        study.optimize(self.objective,n_trials = number_of_trials)
        trial = study.best_trial
        return trial.params

In [None]:
#loss functions

VERSE_PPR =  {"Name": "VERSE_PPR","C": "PPR","num_negative_samples":[1, 6, 11, 16, 21],"loss var": "Context Matrix","flag_tosave":False,"alpha": [0.1,0.2,0.3,0.4,0.5,0.6,0.7,0.8,0.9],"Sampler" :SamplerContextMatrix,"lmbda": [0.0,1.0]}
VERSE_Adj =  {"Name": "VERSE_Adj","C": "Adj","num_negative_samples":[1, 6, 11, 16, 21],"loss var": "Context Matrix","flag_tosave":False,"Sampler" :SamplerContextMatrix,"lmbda": [0.0,1.0]} 

VERSE_SR =  {"Name": "VERSE_SimRank","C": "SR","num_negative_samples":[1, 6, 11, 16, 21],"loss var": "Context Matrix","flag_tosave":False,"Sampler":SamplerContextMatrix,"lmbda": [0.0,1.0]} 
DeepWalk = {"Name": "DeepWalk","walk_length":[5, 10, 15, 20],"walks_per_node":[5, 10, 15, 20],"num_negative_samples":[1,6, 11, 16, 21],"context_size" : [5, 10, 15, 20],"p":1,"q":1,"loss var": "Random Walks","flag_tosave":False,"Sampler" : SamplerRandomWalk } #Проблемы с памятью после того, как увеличила количество тренировочных данных
Node2Vec = {"Name": "Node2Vec","walk_length":[5, 10, 15, 20],"walks_per_node":[5, 10, 15, 20],"num_negative_samples":[1,6, 11, 16, 21],"context_size" : [5, 10, 15, 20],"p": [0.25, 0.50, 1, 2, 4] ,"q":[0.25, 0.50, 1, 2, 4], "loss var": "Random Walks","flag_tosave":False,"Sampler": SamplerRandomWalk}#то же самое 
APP ={"Name": "APP","C": "PPR","num_negative_samples":[1, 6, 11, 16, 21],"loss var": "Context Matrix","flag_tosave":True,"alpha": [0.1,0.2,0.3,0.4,0.5,0.6,0.7,0.8,0.9],"Sampler" :SamplerAPP}
HOPE_Katz = {"Name": "HOPE_Katz","C":"Katz","loss var": "Factorization","flag_tosave":True,"betta": [0.1,0.2,0.3,0.4,0.5,0.6,0.7,0.8,0.9],"Sampler" :SamplerFactorization,"lmbda": [0.0,1.0]} #проверить

HOPE_RPR = {"Name": "HOPE_RPR","C":"RPR","loss var": "Factorization","flag_tosave":True,"alpha": [0.1,0.2,0.3,0.4,0.5,0.6,0.7,0.8,0.9],"Sampler" :SamplerFactorization,"lmbda": [0.0,1.0]} #проверить
HOPE_CN = {"Name": "HOPE_CommonNeighbors","C":"CN","loss var": "Factorization","flag_tosave":False,"Sampler" :SamplerFactorization,"lmbda": [0.0,1.0]} 
HOPE_AA = {"Name": "HOPE_AdamicAdar","C":"AA","loss var": "Factorization","flag_tosave":True,"Sampler" :SamplerFactorization,"lmbda": [0.0,1.0]} 

LapEigen = {"Name": "LaplacianEigenMaps", "C":"Adj","loss var": "Laplacian EigenMaps","flag_tosave":True,"Sampler" :SamplerFactorization,"lmbda": [0.0,1.0]}
LINE = {"Name": "LINE","C": "Adj","num_negative_samples":[1, 6, 11, 16, 21],"loss var": "Context Matrix","flag_tosave":False,"Sampler" :SamplerContextMatrix,"lmbda": [0.0,1.0]} 
GraphFactorization = {"Name": "Graph Factorization","C":"Adj","loss var": "Factorization","flag_tosave":False,"Sampler" :SamplerFactorization,"lmbda": [0.0,1.0]} 

Force2Vec = {"Name": "Force2Vec","C": "Adj","num_negative_samples":[1, 6, 11, 16, 21],"loss var": "Force2Vec","flag_tosave":False,"Sampler" :SamplerContextMatrix,"lmbda": [0.0,1.0]} 


In [None]:
device=torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [None]:
loss = Force2Vec
loss_name = 'Force2Vec'
for name in datasets_names:
        for conv in ['GCN','GAT','SAGE']:
                if len(analysis[ (analysis['loss'] == loss_name) & (analysis['conv'] == conv) & (analysis['dataset'] == name)] ) == 0:

                    MO = MainOptuna(name = name,conv = conv, device = device, loss_function = loss,mode = 'unsupervised')
                    best_values=MO.run(number_of_trials = 500)

                    loss_trgt=dict()
                    for par in loss:
                        loss_trgt[par]=loss[par]

                    loss_trgt["num_negative_samples"] = best_values['num_negative_samples']
                    loss_trgt["lmbda"] = best_values['lmbda']

                    M = Main(name=name,conv=conv, device=device, loss_function=loss_trgt,mode= 'unsupervised')
                    train_acc_mi, test_acc_mi,train_acc_ma , test_acc_ma = M.run(best_values)

                    to_append=pd.Series([loss_name, conv,name, train_acc_mi, test_acc_mi,train_acc_ma , test_acc_ma],index = analysis.columns)
                    analysis = analysis.append(to_append,ignore_index=True)
                    analysis.to_csv('data_analysis_force2vec.csv')


In [None]:
loss = VERSE_Adj
loss_name = 'VERSE_Adj'

for name in datasets_names:
    for conv in ['GCN']:
            if len(analysis[ (analysis['loss'] == loss_name) & (analysis['conv'] == conv) & (analysis['dataset'] == name)] ) == 0:
                
                MO = MainOptuna(name=name,conv=conv, device=device, loss_function=loss,mode= 'unsupervised')
                best_values=MO.run(number_of_trials=500)

                loss_trgt=dict()
                for par in loss:
                    loss_trgt[par]=loss[par]
 
                loss_trgt["num_negative_samples"] = best_values['num_negative_samples']
                loss_trgt["lmbda"] = best_values['lmbda']

                M = Main(name=name,conv=conv, device=device, loss_function=loss_trgt,mode= 'unsupervised')
                train_acc_mi, test_acc_mi,train_acc_ma , test_acc_ma = M.run(best_values)

                to_append=pd.Series([loss_name, conv,name, train_acc_mi, test_acc_mi,train_acc_ma , test_acc_ma],index = analysis.columns)
                analysis = analysis.append(to_append,ignore_index=True)
                analysis.to_csv('data_analysis_realdata.csv')
                

In [None]:
loss = VERSE_SR
loss_name = 'VERSE_SR'

for name in datasets_names:
    for conv in ['GCN']:
            if len(analysis[ (analysis['loss'] == loss_name) & (analysis['conv'] == conv) & (analysis['dataset'] == name)] ) == 0:
                
                MO = MainOptuna(name=name,conv=conv, device=device, loss_function=loss,mode= 'unsupervised')
                best_values=MO.run(number_of_trials=500)

                loss_trgt=dict()
                for par in loss:
                    loss_trgt[par]=loss[par]
   
                
                loss_trgt["num_negative_samples"] = best_values['num_negative_samples']
                loss_trgt["lmbda"] = best_values['lmbda']

                M = Main(name=name,conv=conv, device=device, loss_function=loss_trgt,mode= 'unsupervised')
                train_acc_mi, test_acc_mi,train_acc_ma , test_acc_ma = M.run(best_values)

                to_append=pd.Series([loss_name, conv,name, train_acc_mi, test_acc_mi,train_acc_ma , test_acc_ma],index = analysis.columns)
                analysis = analysis.append(to_append,ignore_index=True)
                analysis.to_csv('data_analysis_realdata.csv')

In [None]:
loss = VERSE_PPR
loss_name = 'VERSE_PPR'

for name in datasets_names:
    for conv in ['GCN']:
            if len(analysis[ (analysis['loss'] == loss_name) & (analysis['conv'] == conv) & (analysis['dataset'] == name)] ) == 0:
                
                MO = MainOptuna(name=name,conv=conv, device=device, loss_function=loss,mode= 'unsupervised')
                best_values=MO.run(number_of_trials=500)

                loss_trgt=dict()
                for par in loss:
                    loss_trgt[par]=loss[par]
   
                
                loss_trgt["num_negative_samples"] = best_values['num_negative_samples']
                loss_trgt["alpha"] = best_values['alpha']
                loss_trgt["lmbda"] = best_values['lmbda']

                M = Main(name=name,conv=conv, device=device, loss_function=loss_trgt,mode= 'unsupervised')
                train_acc_mi, test_acc_mi,train_acc_ma , test_acc_ma = M.run(best_values)

                to_append=pd.Series([loss_name, conv,name, train_acc_mi, test_acc_mi,train_acc_ma , test_acc_ma],index = analysis.columns)
                analysis = analysis.append(to_append,ignore_index=True)
                analysis.to_csv('data_analysis_realdata.csv')


In [None]:
loss = LapEigen
loss_name = 'LapEigen'

for name in datasets_names:
    for conv in ['GCN','GAT','SAGE']:
            if len(analysis[ (analysis['loss'] == loss_name) & (analysis['conv'] == conv) & (analysis['dataset'] == name)] ) == 0:
                
                MO = MainOptuna(name=name,conv=conv, device=device, loss_function=loss,mode= 'unsupervised')
                best_values=MO.run(number_of_trials=500)

                loss_trgt=dict()
                for par in loss:
                    loss_trgt[par]=loss[par]
   
                
                loss_trgt["lmbda"] = best_values['lmbda']


                M = Main(name=name,conv=conv, device=device, loss_function=loss_trgt,mode= 'unsupervised')
                train_acc_mi, test_acc_mi,train_acc_ma , test_acc_ma = M.run(best_values)

                to_append=pd.Series([loss_name, conv,name, train_acc_mi, test_acc_mi,train_acc_ma , test_acc_ma],index = analysis.columns)
                analysis = analysis.append(to_append,ignore_index=True)
                analysis.to_csv('data_analysis_realdata.csv')
                

In [None]:
loss = LINE
loss_name = 'LINE'

for name in datasets_names:
    for conv in ['GCN','GAT','SAGE']:
            if len(analysis[ (analysis['loss'] == loss_name) & (analysis['conv'] == conv) & (analysis['dataset'] == name)] ) == 0:
                
                MO = MainOptuna(name=name,conv=conv, device=device, loss_function=loss,mode= 'unsupervised')
                best_values=MO.run(number_of_trials=500)

                loss_trgt=dict()
                for par in loss:
                    loss_trgt[par]=loss[par]
   
                loss_trgt["num_negative_samples"] = best_values['num_negative_samples']
                loss_trgt["lmbda"] = best_values['lmbda']

                M = Main(name=name,conv=conv, device=device, loss_function=loss_trgt,mode= 'unsupervised')
                train_acc_mi, test_acc_mi,train_acc_ma , test_acc_ma = M.run(best_values)

                to_append=pd.Series([loss_name, conv,name, train_acc_mi, test_acc_mi,train_acc_ma , test_acc_ma],index = analysis.columns)
                analysis = analysis.append(to_append,ignore_index=True)
                analysis.to_csv('data_analysis_realdata.csv')
                

In [None]:
loss = GraphFactorization
loss_name = 'GraphFactorization'

for name in datasets_names:
    for conv in ['GCN','GAT','SAGE']:
            if len(analysis[ (analysis['loss'] == loss_name) & (analysis['conv'] == conv) & (analysis['dataset'] == name)] ) == 0:
                
                MO = MainOptuna(name=name,conv=conv, device=device, loss_function=loss,mode= 'unsupervised')
                best_values=MO.run(number_of_trials=500)

                loss_trgt=dict()
                for par in loss:
                    loss_trgt[par]=loss[par]
   
                
                loss_trgt["lmbda"] = best_values['lmbda']

                M = Main(name=name,conv=conv, device=device, loss_function=loss_trgt,mode= 'unsupervised')
                train_acc_mi, test_acc_mi, train_acc_ma , test_acc_ma = M.run(best_values)

                to_append=pd.Series([loss_name, conv,name, train_acc_mi, test_acc_mi,train_acc_ma , test_acc_ma],index = analysis.columns)
                analysis = analysis.append(to_append,ignore_index=True)
                analysis.to_csv('data_analysis_realdata.csv')
                

In [None]:
loss = HOPE_CN
loss_name = 'HOPE_CN'

for name in datasets_names:
    for conv in ['GCN','GAT','SAGE']:
            if len(analysis[ (analysis['loss'] == loss_name) & (analysis['conv'] == conv) & (analysis['dataset'] == name)] ) == 0:
                
                MO = MainOptuna(name=name,conv=conv, device=device, loss_function=loss,mode= 'unsupervised')
                best_values=MO.run(number_of_trials=500)

                loss_trgt=dict()
                for par in loss:
                    loss_trgt[par]=loss[par]
   
                
                loss_trgt["lmbda"] = best_values['lmbda']

                M = Main(name=name,conv=conv, device=device, loss_function=loss_trgt,mode= 'unsupervised')
                train_acc_mi, test_acc_mi,train_acc_ma , test_acc_ma = M.run(best_values)

                to_append=pd.Series([loss_name, conv,name, train_acc_mi, test_acc_mi,train_acc_ma , test_acc_ma],index = analysis.columns)
                analysis = analysis.append(to_append,ignore_index=True)
                analysis.to_csv('data_analysis_realdata.csv')
                

In [None]:
loss = HOPE_AA
loss_name = 'HOPE_AA'

for name in datasets_names:
    for conv in ['GCN','GAT','SAGE']:
            if len(analysis[ (analysis['loss'] == loss_name) & (analysis['conv'] == conv) & (analysis['dataset'] == name)] ) == 0:
                
                MO = MainOptuna(name=name,conv=conv, device=device, loss_function=loss,mode= 'unsupervised')
                best_values=MO.run(number_of_trials=500)

                loss_trgt=dict()
                for par in loss:
                    loss_trgt[par]=loss[par]
   
                loss_trgt["lmbda"] = best_values['lmbda']

                M = Main(name=name,conv=conv, device=device, loss_function=loss_trgt,mode= 'unsupervised')
                train_acc_mi, test_acc_mi,train_acc_ma , test_acc_ma = M.run(best_values)

                to_append=pd.Series([loss_name, conv,name, train_acc_mi, test_acc_mi,train_acc_ma , test_acc_ma],index = analysis.columns)
                analysis = analysis.append(to_append,ignore_index=True)
                analysis.to_csv('data_analysis_realdata.csv')
                

In [None]:
loss = HOPE_RPR
loss_name = 'HOPE_RPR'

for name in datasets_names:
    for conv in ['GCN','GAT','SAGE']:
            if len(analysis[ (analysis['loss'] == loss_name) & (analysis['conv'] == conv) & (analysis['dataset'] == name)] ) == 0:
                
                MO = MainOptuna(name=name,conv=conv, device=device, loss_function=loss,mode= 'unsupervised')
                best_values=MO.run(number_of_trials=500)

                loss_trgt=dict()
                for par in loss:
                    loss_trgt[par]=loss[par]
   
                
                loss_trgt["alpha"] = best_values['alpha']
                loss_trgt["lmbda"] = best_values['lmbda']

                M = Main(name=name,conv=conv, device=device, loss_function=loss_trgt,mode= 'unsupervised')
                train_acc_mi, test_acc_mi,train_acc_ma , test_acc_ma = M.run(best_values)

                to_append=pd.Series([loss_name, conv,name, train_acc_mi, test_acc_mi,train_acc_ma , test_acc_ma],index = analysis.columns)
                analysis = analysis.append(to_append,ignore_index=True)
                analysis.to_csv('data_analysis_realdata.csv')
                

In [None]:
loss = HOPE_Katz
loss_name = 'HOPE_Katz'

for name in datasets_names:
    for conv in ['GCN','GAT','SAGE']:
            if len(analysis[ (analysis['loss'] == loss_name) & (analysis['conv'] == conv) & (analysis['dataset'] == name)] ) == 0:
                
                MO = MainOptuna(name=name,conv=conv, device=device, loss_function=loss,mode= 'unsupervised')
                best_values=MO.run(number_of_trials=500)

                loss_trgt=dict()
                for par in loss:
                    loss_trgt[par]=loss[par]
   
                
                loss_trgt["betta"] = best_values['betta']
                loss_trgt["lmbda"] = best_values['lmbda']

                M = Main(name=name,conv=conv, device=device, loss_function=loss_trgt,mode= 'unsupervised')
                train_acc_mi, test_acc_mi,train_acc_ma , test_acc_ma = M.run(best_values)

                to_append=pd.Series([loss_name, conv,name, train_acc_mi, test_acc_mi,train_acc_ma , test_acc_ma],index = analysis.columns)
                analysis = analysis.append(to_append,ignore_index=True)
                analysis.to_csv('data_analysis_realdata.csv')
                

In [None]:
loss = Node2Vec
loss_name = 'Node2Vec'
device = 'cpu'
for name in ['chameleon']:
    for conv in ['GCN']:
            if len(analysis[ (analysis['loss'] == loss_name) & (analysis['conv'] == conv) & (analysis['dataset'] == name)] ) == 0:
                
                MO = MainOptuna(name=name,conv=conv, device=device, loss_function=loss,mode= 'unsupervised')
                best_values=MO.run(number_of_trials=500)

                loss_trgt=dict()
                for par in loss:
                    loss_trgt[par]=loss[par]
   
                
                loss_trgt["walks_per_node"] = best_values['walk_length']
                loss_trgt["walk_length"] = best_values['walk_length']
                loss_trgt["num_negative_samples"] = best_values['num_negative_samples']
                loss_trgt["context_size"] = best_values['context_size']
                loss_trgt["p"] = best_values['p']
                loss_trgt["q"] = best_values['q']

                M = Main(name=name,conv=conv, device=device, loss_function=loss_trgt,mode= 'unsupervised')
                train_acc_mi, test_acc_mi,train_acc_ma , test_acc_ma = M.run(best_values)

                to_append=pd.Series([loss_name, conv,name, train_acc_mi, test_acc_mi,train_acc_ma , test_acc_ma],index = analysis.columns)
                analysis = analysis.append(to_append,ignore_index=True)
                analysis.to_csv('data_analysis_realdata.csv')
                

In [None]:
loss = DeepWalk
loss_name = 'DeepWalk'
device='cpu'
for name in ['Citeseer']:
    for conv in ['GCN','GAT','SAGE']:
            if len(analysis[ (analysis['loss'] == loss_name) & (analysis['conv'] == conv) & (analysis['dataset'] == name)] ) == 0:
                
                MO = MainOptuna(name=name,conv=conv, device=device, loss_function=loss,mode= 'unsupervised')
                best_values=MO.run(number_of_trials=500)

                loss_trgt=dict()
                for par in loss:
                    loss_trgt[par]=loss[par]
   
                
                loss_trgt["walks_per_node"] = best_values['walk_length']
                loss_trgt["walk_length"] = best_values['walk_length']
                loss_trgt["num_negative_samples"] = best_values['num_negative_samples']
                loss_trgt["context_size"] = best_values['context_size']
                M = Main(name=name,conv=conv, device=device, loss_function=loss_trgt,mode= 'unsupervised')
                train_acc_mi, test_acc_mi,train_acc_ma , test_acc_ma = M.run(best_values)

                to_append=pd.Series([loss_name, conv,name, train_acc_mi, test_acc_mi,train_acc_ma , test_acc_ma],index = analysis.columns)
                analysis = analysis.append(to_append,ignore_index=True)
                analysis.to_csv('data_analysis_realdata.csv')
                

In [None]:
loss = APP
loss_name = 'APP'

for name in datasets_names:
    for conv in ['GCN']:
            if len(analysis[ (analysis['loss'] == loss_name) & (analysis['conv'] == conv) & (analysis['dataset'] == name)] ) == 0:
                
                MO = MainOptuna(name=name,conv=conv, device=device, loss_function=loss,mode= 'unsupervised')
                best_values=MO.run(number_of_trials=500)

                loss_trgt=dict()
                for par in loss:
                    loss_trgt[par]=loss[par]
   
                
                loss_trgt["num_negative_samples"] = best_values['num_negative_samples']
                loss_trgt["alpha"] = best_values['alpha']

                M = Main(name=name,conv=conv, device=device, loss_function=loss_trgt,mode= 'unsupervised')
                train_acc_mi, test_acc_mi,train_acc_ma , test_acc_ma = M.run(best_values)

                to_append=pd.Series([loss_name, conv,name, train_acc_mi, test_acc_mi,train_acc_ma , test_acc_ma],index = analysis.columns)
                analysis = analysis.append(to_append,ignore_index=True)
                analysis.to_csv('data_analysis_realdata.csv')
                