## LFR generation

In [None]:
from networkit import *
import pickle
import networkx as nx
from networkx.generators.community import LFR_benchmark_graph
from datetime import datetime

def nx2nkit(g_nx):
    
    node_num = g_nx.number_of_nodes()
    g_nkit = Graph(directed=True)
    
    for i in range(node_num):
        g_nkit.addNode()
    
    for e1,e2 in g_nx.edges():
        g_nkit.addEdge(e1,e2)
        
    assert g_nx.number_of_nodes()==g_nkit.numberOfNodes(),"Number of nodes not matching"
    assert g_nx.number_of_edges()==g_nkit.numberOfEdges(),"Number of edges not matching"
        
    return g_nkit

def cal_exact_bet(g_nkit):

    #exact_bet = nx.betweenness_centrality(g_nx,normalized=True)

    exact_bet = centrality.Betweenness(g_nkit,normalized=True).run().ranking()
    exact_bet_dict = dict()
    for j in exact_bet:
        exact_bet_dict[j[0]] = j[1]
    return exact_bet_dict


def generate_bet_LFR_data(num_of_graphs,output_path):
    
    list_bet_data = list()

    for i in range(num_of_graphs):
        
        while True:
            try:
                print(f"Graph index:{i+1}/{num_of_graphs}, Time: {datetime.now().strftime('%d/%m/%Y %H:%M:%S')}")
                g_nx = LFR_benchmark_graph(n=10000,tau1=3,tau2=1.5,mu=0.05,average_degree=6,min_community=20)
            except:
                continue
            else:
                break
        print("removing isolates")
        
        if nx.number_of_isolates(g_nx)>0:
            g_nx.remove_nodes_from(list(nx.isolates(g_nx)))
        
        g_nx = nx.convert_node_labels_to_integers(g_nx)
        g_nkit = nx2nkit(g_nx)
        bet_dict = cal_exact_bet(g_nkit)
        list_bet_data.append([g_nx,bet_dict])

        with open(f"custom/"+output_path,"wb") as fopen:
            pickle.dump(list_bet_data,fopen)


# Create train graphs and save them to sf_train_50.pickle
num_of_graphs = 15

output_path = f"graphs/LFR_{num_of_graphs}_graphs.pickle"

generate_bet_LFR_data(num_of_graphs,output_path)

## create datasets LFR

In [None]:
from utils import *
random.seed(1)


param = {
    "size" : [10000,100000,300000,900000],
    "num_train" : 5,
    "num_test" : 10,
    "num_copies": [1,10,20,40]
}


with open(f"./graphs/LFR_15_graphs.pickle","rb") as fopen:
    list_data = pickle.load(fopen)

num_graph = len(list_data)
assert param["num_train"]+param["num_test"] == num_graph,"Required split size doesn't match number of graphs in pickle file."

for size in param["size"]:
    for c in param["num_copies"]:

        #For training split
        if param["num_train"] > 0:
            list_graph, list_n_sequence, list_node_num, cent_mat = create_dataset(list_data[:param["num_train"]],num_copies = c,adj_size=size)

            with open(f"./data_splits/train/LFR_5_graphs_{c}_copies_{size}_size.pickle","wb") as fopen:
                pickle.dump([list_graph,list_n_sequence,list_node_num,cent_mat],fopen)

#For test split
size = param["size"][0]
if param["num_test"] > 0:
    list_graph, list_n_sequence, list_node_num, cent_mat = create_dataset(list_data[param["num_train"]:param["num_train"]+param["num_test"]],num_copies = 1,adj_size=size)

    with open(f"./data_splits/test/LFR_10_graphs_{size}_size.pickle","wb") as fopen:
        pickle.dump([list_graph,list_n_sequence,list_node_num,cent_mat],fopen)

## create datasets scale free

In [None]:
from utils import *
random.seed(1)


param = {
    "size" : [10000,100000,300000,900000],
    "num_train" : 5,
    "num_test" : 0,
    "num_copies": [1,10,20,40]
}


with open(f"./graphs/SF_5_graphs_10000_nodes.pickle","rb") as fopen:
    list_data = pickle.load(fopen)

num_graph = len(list_data)
assert param["num_train"]+param["num_test"] == num_graph,"Required split size doesn't match number of graphs in pickle file."

for size in param["size"]:
    for c in param["num_copies"]:

        #For training split
        if param["num_train"] > 0:
            list_graph, list_n_sequence, list_node_num, cent_mat = create_dataset(list_data[:param["num_train"]],num_copies = c,adj_size=size)

            with open(f"./data_splits/train/SF_5_graphs_10000_nodes_{c}_copies_{size}_size.pickle","wb") as fopen:
                pickle.dump([list_graph,list_n_sequence,list_node_num,cent_mat],fopen)


## lfr train models

In [None]:
 
from utils import *
from model_bet import *
import argparse
torch.manual_seed(15)


param = {
    "size" : [10000,100000,300000,900000],
    "num_copies": [1,10,20,40]
}

#data_test = f"LFR_10_graphs_10000_size.pickle"
##Load test data
#with open("./data_splits/test/"+data_test,"rb") as fopen:
#    list_graph_test,list_n_seq_test,list_num_node_test,bc_mat_test = pickle.load(fopen)
#
#list_adj_test,list_adj_t_test = graph_to_adj_bet(list_graph_test,list_n_seq_test,list_num_node_test,size)

for size in param["size"]:
    for c in param["num_copies"]:

        torch.manual_seed(15)
        data_train = f"LFR_5_graphs_{c}_copies_{size}_size.pickle"    

        #Load training data
        print(f"Loading data...")
        with open("./data_splits/train/"+data_train,"rb") as fopen:
            list_graph_train,list_n_seq_train,list_num_node_train,bc_mat_train = pickle.load(fopen)

        list_adj_train,list_adj_t_train = graph_to_adj_bet(list_graph_train,list_n_seq_train,list_num_node_train,size)

        #Model parameters
        hidden = 20

        device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
        model = GNN_Bet(ninput=size,nhid=hidden,dropout=0.6)
        model.to(device)

        optimizer = torch.optim.Adam(model.parameters(),lr=0.0005)
        num_epoch = 15

        for e in range(num_epoch):
            print(f"{c}_copies_{size}_size_{e}_epoch_{datetime.now().strftime('%d/%m/%Y %H:%M:%S')}")
            train(list_adj_train,list_adj_t_train,list_num_node_train,bc_mat_train)
            
            saving_path = f"./models/LFR/LFR_5_graphs_{c}_copies_{size}_size_{e}_epoch"
            torch.save(model.state_dict(), saving_path)

## lfr performance

In [None]:

 
from utils import *
from model_bet import *
import argparse
import pandas as pd
torch.manual_seed(15)


size = 10000

Results = {"graph":[],
            "size": [],
            "copies":[],
            "epochs": [],
            "kendalltau":[],
            "avg":[]}

data_path = f'LFR_10_graphs_10000_size.pickle'

#Load test data
with open("./data_splits/test/"+data_path,"rb") as fopen:
    list_graph_test,list_n_seq_test,list_num_node_test,bc_mat_test = pickle.load(fopen)

#Get adjacency matrices from graphs
#print(f"Graphs to adjacency conversion.")

list_adj_test,list_adj_t_test = graph_to_adj_bet(list_graph_test,list_n_seq_test,list_num_node_test,size)


for c in [1,10,20,40]:
    for e in range(15):
        
        print(f"copies: {c}, epoch {e}")
        
        model_path = f"./models/LFR/LFR_5_graphs_{c}_copies_{size}_size_{e}_epoch"

        #Model parameters
        hidden = 20

        device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
        model = GNN_Bet(ninput=size,nhid=hidden,dropout=0.6)

        model.load_state_dict(torch.load(model_path))

        model.to(device)


        with torch.no_grad():
            r = test(list_adj_test,list_adj_t_test,list_num_node_test,bc_mat_test,model=model,device=device,size=size)
        
        Results["graph"].append("LFR_10_graphs_10000_size")
        Results["size"].append(size)
        Results["copies"].append(c)
        Results["epochs"].append(e)
        Results["kendalltau"].append(r["kt"])
        Results["avg"].append(r["avg"])


        df = pd.DataFrame.from_dict(Results)
        df.to_csv("output_LFR_graphs_peformance.csv")


## lfr performance real graphs

In [None]:
from utils import *
from model_bet import *
import argparse
import pandas as pd


graphs = ['1-wiki-Vote','2-soc-Epinions','3-email-EuAll','4-web-Google']
sizes = [10000,100000,300000,900000]

Results = {}

for i in range(len(graphs)):

    g = graphs[i]
    size = sizes[i]
    data_test = f'{g}_{size}_size.pickle'

    Results[data_test] = {'true': [],'pred': []}
    #Load test data
    with open("./data_splits/test/"+data_test,"rb") as fopen:
        list_graph_test,list_n_seq_test,list_num_node_test,bc_mat_test = pickle.load(fopen)
    list_adj_test,list_adj_t_test = graph_to_adj_bet(list_graph_test,list_n_seq_test,list_num_node_test,size)

    for c in [1,10,20,40]:
        
        data_train = f"LFR_5_graphs_{c}_copies_{size}_size.pickle"
        
        #Load training data
        with open("./data_splits/train/"+data_train,"rb") as fopen:
            list_graph_train,list_n_seq_train,list_num_node_train,bc_mat_train = pickle.load(fopen)

        #Get adjacency matrices from graphs
        list_adj_train,list_adj_t_train = graph_to_adj_bet(list_graph_train,list_n_seq_train,list_num_node_train,size)

        for seed in range(10):
            
            currentresult = {'data_train': data_train, 'seed':seed, 'copies': c}
            
            torch.manual_seed(seed)
            print(f"G:{g}, size: {size}, copies: {c}, seed {seed}, Time: {datetime.now().strftime('%d/%m/%Y %H:%M:%S')}")
            
            hidden = 20
            device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
            model = GNN_Bet(ninput=size,nhid=hidden,dropout=0.6)
            model.to(device)
            optimizer = torch.optim.Adam(model.parameters(),lr=0.0005)

#            with torch.no_grad():
#                r = test_onegraph(list_adj_test,list_adj_t_test,list_num_node_test,bc_mat_test,model=model,device=device,size=size)
#
#            currentresult['no_train'] = {'pred':r['pred'],'kt':r["kt"]}

            train(list_adj_train,list_adj_t_train,list_num_node_train,bc_mat_train,model=model,device=device,optimizer=optimizer,size=size)

            with torch.no_grad():
                r = test_onegraph(list_adj_test,list_adj_t_test,list_num_node_test,bc_mat_test,model=model,device=device,size=size)

            currentresult['train'] = {'pred':r['pred'],'kt':r["kt"]}

            if len(Results[data_test]['true']) == 0:
                Results[data_test]['true'] = r['true']

            Results[data_test]['pred'].append(currentresult)

            with open("LFR_real_performance.pickle","wb") as fopen:
                pickle.dump(Results,fopen)


## sf performance real graphs

In [None]:
from utils import *
from model_bet import *
import argparse
import pandas as pd


graphs = ['1-wiki-Vote','2-soc-Epinions','3-email-EuAll','4-web-Google']
sizes = [10000,100000,300000,900000]
epochs = 5

Results = {}

for i in range(len(graphs)):

    g = graphs[i]
    size = sizes[i]
    data_test = f'{g}_{size}_size.pickle'

    Results[data_test] = {'true': [],'pred': []}
    #Load test data
    with open("./data_splits/test/"+data_test,"rb") as fopen:
        list_graph_test,list_n_seq_test,list_num_node_test,bc_mat_test = pickle.load(fopen)
    list_adj_test,list_adj_t_test = graph_to_adj_bet(list_graph_test,list_n_seq_test,list_num_node_test,size)

    for c in [1,10,20,40]:        
        
        data_train = f"SF_5_graphs_10000_nodes_{c}_copies_{size}_size.pickle"
        
        #Load training data
        with open("./data_splits/train/"+data_train,"rb") as fopen:
            list_graph_train,list_n_seq_train,list_num_node_train,bc_mat_train = pickle.load(fopen)

        #Get adjacency matrices from graphs
        list_adj_train,list_adj_t_train = graph_to_adj_bet(list_graph_train,list_n_seq_train,list_num_node_train,size)

        for seed in range(10):
            
            currentresult = {'data_train': data_train, 'seed':seed, 'copies': c}
            
            torch.manual_seed(seed)
            print(f"G:{g}, size: {size}, copies: {c}, seed {seed}, Time: {datetime.now().strftime('%d/%m/%Y %H:%M:%S')}")
            
            hidden = 20
            device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
            model = GNN_Bet(ninput=size,nhid=hidden,dropout=0.6)
            model.to(device)
            optimizer = torch.optim.Adam(model.parameters(),lr=0.0005)

#            with torch.no_grad():
#                r = test_onegraph(list_adj_test,list_adj_t_test,list_num_node_test,bc_mat_test,model=model,device=device,size=size)
#
#            currentresult['no_train'] = {'pred':r['pred'],'kt':r["kt"]}

            train(list_adj_train,list_adj_t_train,list_num_node_train,bc_mat_train,model=model,device=device,optimizer=optimizer,size=size)

            with torch.no_grad():
                r = test_onegraph(list_adj_test,list_adj_t_test,list_num_node_test,bc_mat_test,model=model,device=device,size=size)

            currentresult['train'] = {'pred':r['pred'],'kt':r["kt"]}

            if len(Results[data_test]['true']) == 0:
                Results[data_test]['true'] = r['true']

            Results[data_test]['pred'].append(currentresult)

            with open(f"SF_real_performance_{epochs}_epochs.pickle","wb") as fopen:
                pickle.dump(Results,fopen)


## results analysis

In [None]:
import pickle
import numpy as np

In [None]:
for g in ['LFR']:
    
    with open(f"{g}_real_performance.pickle",'rb') as f:
        data = pickle.load(f)

    for net in data:
        
        d = {1:[],10:[],20:[],40:[]}

        for p in data[net]['pred']:
            d[p['copies']].append(p['train']['kt'])
        
        for c in d.keys():
            if len(d[c]) != 10:
                continue
            print(f"Graph: {net} training: {g} Copies: {c} data: {len(d[c])}")
            print(round(np.mean(np.array(d[c])),4),round(np.std(np.array(d[c])),4))
            print(d[c])

In [None]:
for g in ['SF']:
    
    with open(f"{g}_real_performance.pickle",'rb') as f:
        data = pickle.load(f)

    for net in data:
        
        d = {1:[],10:[],20:[],40:[]}

        for p in data[net]['pred']:
            d[p['copies']].append(p['train']['kt'])
        
        for c in d.keys():
            if len(d[c]) != 10:
                continue
            print(f"Graph: {net} training: {g} Copies: {c} data: {len(d[c])}")
            print(round(np.mean(np.array(d[c])),4),round(np.std(np.array(d[c])),4))

## generation models parallelaux

In [None]:
from utils import *
from model_bet import *
import argparse
import pandas as pd
import multiprocessing as mp

def paral_func(size,copies,seed,list_adj_train,list_adj_t_train,list_num_node_train,bc_mat_train, data_train):
    
    print(f"Starting: size:{size}, copies: {copies},seed: {seed}, Time: {datetime.now().strftime('%d/%m/%Y %H:%M:%S')}")    
    torch.manual_seed(seed)           
    hidden = 20
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model = GNN_Bet(ninput=size,nhid=hidden,dropout=0.6)
    model.to(device)
    optimizer = torch.optim.Adam(model.parameters(),lr=0.0005)

    epochs = 5 #epochs = 15
    for e in range(epochs):
        train(list_adj_train,list_adj_t_train,list_num_node_train,bc_mat_train,model=model,device=device,optimizer=optimizer,size=size)
        print(f"Computed {data_train}_{seed}_seed_{e}_epoch, Time: {datetime.now().strftime('%d/%m/%Y %H:%M:%S')}")
    
    saving_path = f'models/{data_train}_{seed}_seed_{e}_epoch'
    torch.save(model.state_dict(), saving_path)

    print(f"Finished {saving_path}, Time: {datetime.now().strftime('%d/%m/%Y %H:%M:%S')}")


if __name__ == "__main__":

    graphs = ['1-wiki-Vote','2-soc-Epinions','3-email-EuAll','4-web-Google']
    sizes = [10000,100000,300000,900000]

    graphs = ['2-soc-Epinions','3-email-EuAll','4-web-Google'] #graphs = ['1-wiki-Vote']
    sizes = [100000,300000,900000] #sizes = [10000]

    for i in range(len(graphs)):

        size = sizes[i]

        for c in [1,10,20,40]:
            
            LFR_data_train = f"LFR_5_graphs_{c}_copies_{size}_size.pickle"
            SF_data_train = f"SF_5_graphs_10000_nodes_{c}_copies_{size}_size.pickle"
            
            data_train = LFR_data_train
            
            #Load training data
            with open("./data_splits/train/"+data_train,"rb") as fopen:
                list_graph_train,list_n_seq_train,list_num_node_train,bc_mat_train = pickle.load(fopen)

            #Get adjacency matrices from graphs
            list_adj_train,list_adj_t_train = graph_to_adj_bet(list_graph_train,list_n_seq_train,list_num_node_train,size)

            print(f"Starting: Size: {size}, copies: {c}, Time: {datetime.now().strftime('%d/%m/%Y %H:%M:%S')}")
            
            processes = []
            
            seeds = 16

            for batch in range(seeds//4):
                for id in range(4):
                    seed = id+batch*4
                    #paral_func(size,c,seed,list_adj_train,list_adj_t_train,list_num_node_train,bc_mat_train)
                    
                    p = mp.Process(target=paral_func,args=[size,c,seed,list_adj_train,list_adj_t_train,list_num_node_train,bc_mat_train,data_train[:-7]])
                    p.start()
                    processes.append(p)


                for process in processes:
                    process.join()

            print(f"Finished: Size: {size}, copies: {c}, Time: {datetime.now().strftime('%d/%m/%Y %H:%M:%S')}")

## generation models parallel

In [None]:
from utils import *
from model_bet import *
import argparse
import pandas as pd
import multiprocessing as mp

def paral_func(size,copies,seed,list_adj_train,list_adj_t_train,list_num_node_train,bc_mat_train, data_train):
    
    print(f"Starting: size:{size}, copies: {copies},seed: {seed}, Time: {datetime.now().strftime('%d/%m/%Y %H:%M:%S')}")    
    torch.manual_seed(seed)           
    hidden = 20
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model = GNN_Bet(ninput=size,nhid=hidden,dropout=0.6)
    model.to(device)
    optimizer = torch.optim.Adam(model.parameters(),lr=0.0005)

    epochs = 15
    for e in range(epochs):
        train(list_adj_train,list_adj_t_train,list_num_node_train,bc_mat_train,model=model,device=device,optimizer=optimizer,size=size)

        saving_path = f'models/{data_train}_{seed}_seed_{e}_epoch'
        torch.save(model.state_dict(), saving_path)
    
        print(f"Finished {saving_path}, Time: {datetime.now().strftime('%d/%m/%Y %H:%M:%S')}")


if __name__ == "__main__":

    graphs = ['1-wiki-Vote','2-soc-Epinions','3-email-EuAll','4-web-Google']
    sizes = [10000,100000,300000,900000]

    graphs = ['1-wiki-Vote']
    sizes = [10000]

    for i in range(len(graphs)):

        size = sizes[i]

        for c in [1,10,20,40]:
            
            LFR_data_train = f"LFR_5_graphs_{c}_copies_{size}_size.pickle"
            SF_data_train = f"SF_5_graphs_10000_nodes_{c}_copies_{size}_size.pickle"
            
            data_train = SF_data_train
            
            #Load training data
            with open("./data_splits/train/"+data_train,"rb") as fopen:
                list_graph_train,list_n_seq_train,list_num_node_train,bc_mat_train = pickle.load(fopen)

            #Get adjacency matrices from graphs
            list_adj_train,list_adj_t_train = graph_to_adj_bet(list_graph_train,list_n_seq_train,list_num_node_train,size)

            print(f"Starting: Size: {size}, copies: {c}, Time: {datetime.now().strftime('%d/%m/%Y %H:%M:%S')}")
            
            processes = []
            
            seeds = 16

            for batch in range(seeds//4):
                for id in range(4):
                    seed = id+batch*4
                    #paral_func(size,c,seed,list_adj_train,list_adj_t_train,list_num_node_train,bc_mat_train)
                    
                    p = mp.Process(target=paral_func,args=[size,c,seed,list_adj_train,list_adj_t_train,list_num_node_train,bc_mat_train,data_train[:-7]])
                    p.start()
                    processes.append(p)


                for process in processes:
                    process.join()

            print(f"Finished: Size: {size}, copies: {c}, Time: {datetime.now().strftime('%d/%m/%Y %H:%M:%S')}")

## read models generate results 2

In [None]:
from utils import *
from model_bet import *
import argparse
import pandas as pd


graphs = ['1-wiki-Vote','2-soc-Epinions','3-email-EuAll','4-web-Google']
sizes = [10000,100000,300000,900000]

graphs = ['2-soc-Epinions']
sizes = [100000]


Results = {'LFR': {}, 'SF': {}}

for i in range(len(graphs)):

    g = graphs[i]
    size = sizes[i]
    data_test = f'{g}_{size}_size.pickle'

    #Load test data
    with open("./data_splits/test/"+data_test,"rb") as fopen:
        list_graph_test,list_n_seq_test,list_num_node_test,bc_mat_test = pickle.load(fopen)

    list_adj_test,list_adj_t_test = graph_to_adj_bet(list_graph_test,list_n_seq_test,list_num_node_test,size)

    Results['LFR'][f"{size}_size"] = {'test_graph': data_test, 'real': []}
    Results['SF'][f"{size}_size"] = {'test_graph': data_test, 'real': []}

    for c in [1,10,20,40]:
    
        LFR_data_train = f"LFR_5_graphs_{c}_copies_{size}_size.pickle"
        SF_data_train = f"SF_5_graphs_10000_nodes_{c}_copies_{size}_size.pickle"
        
        Results['LFR'][f"{size}_size"][f"{c}_copies"] = {'data_train' : LFR_data_train,'pred':{}}
        Results['SF'][f"{size}_size"][f"{c}_copies"] = {'data_train' : SF_data_train,'pred':{}}
        
        for epoch in [4]:
            # We analyse the results for the different networks and 5 epochs (0 to 4)
            Results['LFR'][f"{size}_size"][f"{c}_copies"]['pred'][f'{epoch}_epoch'] = {}
            Results['SF'][f"{size}_size"][f"{c}_copies"]['pred'][f'{epoch}_epoch'] = {}
        
            for seed in range(15):
                    
                    data_train = LFR_data_train
                    model_path = f'{data_train[:-7]}_{seed}_seed_{epoch}_epoch'
                    print(model_path)
                    torch.manual_seed(seed)
                    hidden = 20
                    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
                    model = GNN_Bet(ninput=size,nhid=hidden,dropout=0.6)
                    model.load_state_dict(torch.load(f'models/{model_path}'))
                    model.to(device)
                    optimizer = torch.optim.Adam(model.parameters(),lr=0.0005)
                    with torch.no_grad():
                        r = test_onegraph(list_adj_test,list_adj_t_test,list_num_node_test,bc_mat_test,model=model,device=device,size=size)

                    Results['LFR'][f"{size}_size"][f"{c}_copies"]['pred'][f'{epoch}_epoch'][f"{seed}_seed"] = {'pred':r['pred'],'kt':r["kt"]}

                    if len(Results['LFR'][f"{size}_size"]['real']) == 0:
                        Results['LFR'][f"{size}_size"]['real'] = r['true']

                    with open(f"LFR_real_performance_full_{g}_5_epochs.pickle","wb") as fopen:
                        pickle.dump(Results,fopen)

                    data_train = SF_data_train
                    model_path = f'{data_train[:-7]}_{seed}_seed_{epoch}_epoch'
                    torch.manual_seed(seed)
                    hidden = 20
                    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
                    model = GNN_Bet(ninput=size,nhid=hidden,dropout=0.6)
                    model.load_state_dict(torch.load(f'models/{model_path}'))
                    model.to(device)
                    optimizer = torch.optim.Adam(model.parameters(),lr=0.0005)
                    with torch.no_grad():
                        r = test_onegraph(list_adj_test,list_adj_t_test,list_num_node_test,bc_mat_test,model=model,device=device,size=size)

                    Results['SF'][f"{size}_size"][f"{c}_copies"]['pred'][f'{epoch}_epoch'][f"{seed}_seed"] = {'pred':r['pred'],'kt':r["kt"]}

                    if len(Results['SF'][f"{size}_size"]['real']) == 0:
                        Results['SF'][f"{size}_size"]['real'] = r['true']

                    with open(f"SF_real_performance_full_{g}_5_epochs.pickle","wb") as fopen:
                        pickle.dump(Results,fopen)


## read models generate results

In [None]:
from utils import *
from model_bet import *
import argparse
import pandas as pd


graphs = ['1-wiki-Vote','2-soc-Epinions','3-email-EuAll','4-web-Google']
sizes = [10000,100000,300000,900000]

graphs = ['1-wiki-Vote']
sizes = [10000]

Results = {'LFR': {}, 'SF': {}}

for i in range(len(graphs)):

    g = graphs[i]
    size = sizes[i]
    data_test = f'{g}_{size}_size.pickle'

    #Load test data
    with open("./data_splits/test/"+data_test,"rb") as fopen:
        list_graph_test,list_n_seq_test,list_num_node_test,bc_mat_test = pickle.load(fopen)

    list_adj_test,list_adj_t_test = graph_to_adj_bet(list_graph_test,list_n_seq_test,list_num_node_test,size)

    Results['LFR'][f"{size}_size"] = {'test_graph': data_test, 'real': []}
    Results['SF'][f"{size}_size"] = {'test_graph': data_test, 'real': []}

    for c in [1,10,20,40]:
    
        LFR_data_train = f"LFR_5_graphs_{c}_copies_{size}_size.pickle"
        SF_data_train = f"SF_5_graphs_10000_nodes_{c}_copies_{size}_size.pickle"
        
        Results['LFR'][f"{size}_size"][f"{c}_copies"] = {'data_train' : LFR_data_train,'pred':{}}
        Results['SF'][f"{size}_size"][f"{c}_copies"] = {'data_train' : SF_data_train,'pred':{}}
        
        for epoch in range(15):
            
            Results['LFR'][f"{size}_size"][f"{c}_copies"]['pred'][f'{epoch}_epoch'] = {}
            Results['SF'][f"{size}_size"][f"{c}_copies"]['pred'][f'{epoch}_epoch'] = {}
        
            for seed in range(15):
                    
                    data_train = LFR_data_train
                    model_path = f'{data_train[:-7]}_{seed}_seed_{epoch}_epoch'
                    print(model_path)
                    torch.manual_seed(seed)
                    hidden = 20
                    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
                    model = GNN_Bet(ninput=size,nhid=hidden,dropout=0.6)
                    model.load_state_dict(torch.load(f'models/{model_path}'))
                    model.to(device)
                    optimizer = torch.optim.Adam(model.parameters(),lr=0.0005)
                    with torch.no_grad():
                        r = test_onegraph(list_adj_test,list_adj_t_test,list_num_node_test,bc_mat_test,model=model,device=device,size=size)

                    Results['LFR'][f"{size}_size"][f"{c}_copies"]['pred'][f'{epoch}_epoch'][f"{seed}_seed"] = {'pred':r['pred'],'kt':r["kt"]}

                    if len(Results['LFR'][f"{size}_size"]['real']) == 0:
                        Results['LFR'][f"{size}_size"]['real'] = r['true']

                    with open(f"LFR_real_performance_full_{g}.pickle","wb") as fopen:
                        pickle.dump(Results,fopen)

                    data_train = SF_data_train
                    model_path = f'{data_train[:-7]}_{seed}_seed_{epoch}_epoch'
                    torch.manual_seed(seed)
                    hidden = 20
                    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
                    model = GNN_Bet(ninput=size,nhid=hidden,dropout=0.6)
                    model.load_state_dict(torch.load(f'models/{model_path}'))
                    model.to(device)
                    optimizer = torch.optim.Adam(model.parameters(),lr=0.0005)
                    with torch.no_grad():
                        r = test_onegraph(list_adj_test,list_adj_t_test,list_num_node_test,bc_mat_test,model=model,device=device,size=size)

                    Results['SF'][f"{size}_size"][f"{c}_copies"]['pred'][f'{epoch}_epoch'][f"{seed}_seed"] = {'pred':r['pred'],'kt':r["kt"]}

                    if Results['SF'][f"{size}_size"]['real'] == -1:
                        Results['SF'][f"{size}_size"]['real'] = r['true']

                    with open(f"SF_real_performance_full_{g}.pickle","wb") as fopen:
                        pickle.dump(Results,fopen)


## read  generated full results 2

In [None]:
import pickle
import numpy as np
import matplotlib.pyplot as plt

In [None]:
realgraph = '1-wiki-Vote'
size = 10000
epoch = 4

for epoch in range(10):
    print(epoch)
    
    for graphtype in ['LFR','SF']:

        with open(f"{graphtype}_real_performance_full_{realgraph}.pickle",'rb') as f:
            data = pickle.load(f)

        line = graphtype+' '+realgraph
        for copies in [1,10,20,40]:
            aux = []
            for seed in range(10):
                aux.append(data[graphtype][f'{size}_size'][f'{copies}_copies']['pred'][f'{epoch}_epoch'][f'{seed}_seed']['kt'])
            line += f' & {round(np.mean(np.array(aux)),4)} \pm {round(np.std(np.array(aux)),4)}'
        print(line)

In [None]:
realgraph = '2-soc-Epinions'
size = 100000
epoch = 4

for graphtype in ['LFR','SF']:

    with open(f"{graphtype}_real_performance_full_{realgraph}_5_epochs.pickle",'rb') as f:
        data = pickle.load(f)

    line = graphtype+' '+realgraph
    for copies in [1,10,20,40]:
        aux = []
        for seed in range(10):
            aux.append(data[graphtype][f'{size}_size'][f'{copies}_copies']['pred'][f'{epoch}_epoch'][f'{seed}_seed']['kt'])
        line += f' & {round(np.mean(np.array(aux)),4)} \pm {round(np.std(np.array(aux)),4)}'
    print(line)

## read generatedfull results

In [None]:
import pickle
import numpy as np
import matplotlib.pyplot as plt

In [None]:
graphtype = 'LFR'
realgraph = '1-wiki-Vote'
size = 10000

with open(f"{graphtype}_real_performance_full_{realgraph}.pickle",'rb') as f:
    data = pickle.load(f)

data = data['LFR']
true = data[f"{size}_size"]['real']

plotting_data = {}

for c in [1,10,20,40]:
    
    plotting_data[c] = {'xs':[], 'ys':[],'err':[]}

    for epoch in range(15):

        plotting_data[c]['xs'].append(epoch)
        
        aux = []
        
        for seed in range(15): #range(15):

            # {'pred':r['pred'],'kt':r["kt"]} -> schema
            pred = data[f"{size}_size"][f"{c}_copies"]['pred'][f'{epoch}_epoch'][f"{seed}_seed"]
            aux.append(pred['kt'])
            
        plotting_data[c]['ys'].append(round(np.mean(np.array(aux)),4))
        plotting_data[c]['err'].append(round(np.std(np.array(aux)),4))


import matplotlib.pyplot as plt

for c in plotting_data:
    xs = [j+1 for j in plotting_data[c]['xs']]
    ys = plotting_data[c]['ys']
    err = plotting_data[c]['err']
    plt.errorbar(xs[:],ys[:],err[:])
    plt.scatter(xs[:],ys[:])
    plt.plot(xs,ys,label=f'Copies: {c}')

plt.title(f"{realgraph} trained with {graphtype} graphs")
plt.xlabel("Number of training epochs")
plt.ylabel("KT Score")
plt.ylim(0.7,1)
plt.xticks(range(1,16),xs)
plt.legend()
plt.show()

In [None]:
graphtype = 'SF'
realgraph = '1-wiki-Vote'
size = 10000

with open(f"{graphtype}_real_performance_full_{realgraph}.pickle",'rb') as f:
    data = pickle.load(f)

data = data['SF']
true = data[f"{size}_size"]['real']

plotting_data = {}

for c in [1,10,20,40]:
    
    plotting_data[c] = {'xs':[], 'ys':[]}

    for epoch in range(15):

        plotting_data[c]['xs'].append(epoch)
        
        aux = []
        
        for seed in range(15): #range(15):

            # {'pred':r['pred'],'kt':r["kt"]} -> schema
            pred = data[f"{size}_size"][f"{c}_copies"]['pred'][f'{epoch}_epoch'][f"{seed}_seed"]
            aux.append(pred['kt'])
            
        plotting_data[c]['ys'].append(round(np.mean(np.array(aux)),4))


import matplotlib.pyplot as plt

for c in plotting_data:
    xs = [j+1 for j in plotting_data[c]['xs']]
    ys = plotting_data[c]['ys']
    plt.plot(xs,ys,label=f'Copies: {c}')

plt.title(f"{realgraph} trained with {graphtype} graphs")
plt.xlabel("Number of training epochs")
plt.ylabel("KT Score")
plt.ylim(0.7,1)
plt.xticks(range(1,16),xs)
plt.legend()
plt.show()

In [None]:
graphtype = 'LFR'
realgraph = '1-wiki-Vote'
size = 10000

with open(f"{graphtype}_real_performance_full_{realgraph}.pickle",'rb') as f:
    data = pickle.load(f)

data = data['LFR']
true = data[f"{size}_size"]['real']

plotting_data = {}

for c in [10]:# [1,10,20,40]:
    
    plotting_data[c] = {'xs':[], 'ys':[],'err':[]}

    for epoch in range(15):

        plotting_data[c]['xs'].append(epoch)
        
        aux = []
        
        for seed in range(15): #range(15):

            # {'pred':r['pred'],'kt':r["kt"]} -> schema
            pred = data[f"{size}_size"][f"{c}_copies"]['pred'][f'{epoch}_epoch'][f"{seed}_seed"]
            aux.append(pred['kt'])
            
        plotting_data[c]['ys'].append(round(np.mean(np.array(aux)),4))
        plotting_data[c]['err'].append(round(np.std(np.array(aux)),4))


for c in plotting_data:
    xs = [j+1 for j in plotting_data[c]['xs']]
    ys = plotting_data[c]['ys']
    plt.plot(xs,ys,label=f'{graphtype} {c} copies')

for c in plotting_data:
    
    xs = [j+1 for j in plotting_data[c]['xs']]
    ys = plotting_data[c]['ys']
    err = plotting_data[c]['err']
    #plt.errorbar(xs[:],ys[:],err[:],c='lightskyblue')
    #plt.scatter(xs[:],ys[:],c='lightskyblue')
    plt.plot(xs,ys,c='lightskyblue',label=f'Copies: {c}')

graphtype = 'SF'
realgraph = '1-wiki-Vote'
size = 10000

with open(f"{graphtype}_real_performance_full_{realgraph}.pickle",'rb') as f:
    data = pickle.load(f)

data = data['SF']
true = data[f"{size}_size"]['real']

plotting_data = {}

for c in [10]:# [1,10,20,40]:
    
    plotting_data[c] = {'xs':[], 'ys':[],'err': []}

    for epoch in range(15):

        plotting_data[c]['xs'].append(epoch)
        
        aux = []
        
        for seed in range(15): #range(15):

            # {'pred':r['pred'],'kt':r["kt"]} -> schema
            pred = data[f"{size}_size"][f"{c}_copies"]['pred'][f'{epoch}_epoch'][f"{seed}_seed"]
            aux.append(pred['kt'])
            
        plotting_data[c]['ys'].append(round(np.mean(np.array(aux)),4))
        plotting_data[c]['err'].append(round(np.std(np.array(aux)),4))

for c in plotting_data:
    xs = [j+1 for j in plotting_data[c]['xs']]
    ys = plotting_data[c]['ys']
    err = plotting_data[c]['err']
    #plt.errorbar(xs[:],ys[:],err[:],c='lightcoral')
    #plt.scatter(xs[:],ys[:],c='lightcoral')
    #plt.plot(xs,ys,c='lightcoral',label=f'Copies: {c}')


plt.title(f"{realgraph} trained with {graphtype} graphs")
plt.xlabel("Number of training epochs")
plt.ylabel("KT Score")

plt.xticks(range(1,16),xs)
plt.legend()
plt.show()
plt.clf()

## test

In [None]:
import pickle
import matplotlib.pyplot as plt
import numpy as np

graphtype = 'LFR'
realgraph = '1-wiki-Vote'
size = 10000

with open(f"{graphtype}_real_performance_full_{realgraph}.pickle",'rb') as f:
    data = pickle.load(f)

data = data['LFR']
true = data[f"{size}_size"]['real']

plotting_data = {}

for c in [40]:# [1,10,20,40]:
    
    plotting_data[c] = {'xs':[], 'ys':[],'err':[]}

    for epoch in range(15):

        plotting_data[c]['xs'].append(epoch)
        
        aux = []
        
        for seed in range(15): #range(15):

            # {'pred':r['pred'],'kt':r["kt"]} -> schema
            pred = data[f"{size}_size"][f"{c}_copies"]['pred'][f'{epoch}_epoch'][f"{seed}_seed"]
            aux.append(pred['kt'])
            
        plotting_data[c]['ys'].append(round(np.mean(np.array(aux)),4))
        plotting_data[c]['err'].append(round(np.std(np.array(aux)),4))


for c in plotting_data:
    
    xs = [j+1 for j in plotting_data[c]['xs']]
    ys = plotting_data[c]['ys']
    err = plotting_data[c]['err']
    plt.errorbar(xs[:],ys[:],err[:],c='b')
    plt.scatter(xs[:],ys[:],c='b')
    plt.plot(xs,ys,c='b',label=f'LFR, copies: {c}')

graphtype = 'SF'
realgraph = '1-wiki-Vote'
size = 10000

with open(f"{graphtype}_real_performance_full_{realgraph}.pickle",'rb') as f:
    data = pickle.load(f)

data = data['SF']
true = data[f"{size}_size"]['real']

plotting_data = {}

for c in [40]:# [1,10,20,40]:
    
    plotting_data[c] = {'xs':[], 'ys':[],'err': []}

    for epoch in range(15):

        plotting_data[c]['xs'].append(epoch)
        
        aux = []
        
        for seed in range(15): #range(15):

            # {'pred':r['pred'],'kt':r["kt"]} -> schema
            pred = data[f"{size}_size"][f"{c}_copies"]['pred'][f'{epoch}_epoch'][f"{seed}_seed"]
            aux.append(pred['kt'])
            
        plotting_data[c]['ys'].append(round(np.mean(np.array(aux)),4))
        plotting_data[c]['err'].append(round(np.std(np.array(aux)),4))

for c in plotting_data:
    xs = [j+1 for j in plotting_data[c]['xs']]
    ys = plotting_data[c]['ys']
    err = plotting_data[c]['err']
    plt.errorbar(xs[:],ys[:],err[:],c='darkred')
    plt.scatter(xs[:],ys[:],c='darkred')
    plt.plot(xs,ys,c='darkred',label=f'SF, copies: {c}')


plt.title(f"{realgraph} LFR vs SF trained graphs")
plt.xlabel("Number of training epochs")
plt.ylabel("KT Score")

plt.xticks(range(1,16),xs)
plt.legend()
plt.show()
plt.clf()