In [1]:
from functions.utils import *

ModuleNotFoundError: No module named 'functions'

## We generate the synthetic graphs

In [None]:


param = {
    "min_nodes": 5000,
    "max_nodes": 10000,
    "num_of_graphs": 15,
    "graph_types": ["ER","SF","GRP"],
    "generation_seeds": [10]
}

for graph_type in param["graph_types"]:

    for seed in param["generation_seeds"]:
        
        random.seed(seed)

        print(f"Generating {param['num_of_graphs']} {graph_type} graphs")
        list_bet_data = list()
        for i in range(param['num_of_graphs']):
            print(f"{datetime.now().strftime('%d/%m/%Y %H:%M:%S')}: Graph index:{i+1}/{param['num_of_graphs']}")
            g_nx = create_graph(graph_type,param['min_nodes'],param['max_nodes'])
            
            if nx.number_of_isolates(g_nx)>0:
                g_nx.remove_nodes_from(list(nx.isolates(g_nx)))
                g_nx = nx.convert_node_labels_to_integers(g_nx)

            g_nkit = nx2nkit(g_nx)
            bet_dict = cal_exact_bet(g_nkit)
            deg_dict = cal_exact_degree(g_nkit)
            list_bet_data.append([g_nx,bet_dict,deg_dict])

        fname_bet = f"./delivery/graphs/{graph_type}_{param['num_of_graphs']}_graphs_{param['max_nodes']}_{param['min_nodes']}_nodes_{seed}_genseed.pickle"    

        with open(fname_bet,"wb") as fopen:
            pickle.dump(list_bet_data,fopen)

print("Graphs saved")

## Creating datasets

In [None]:
param = {
    "adj_size" : 10000,
    "num_train" : 5,
    "num_test" : 10,
    "num_copies": [100],
    "graph_files": ["ER_15_graphs_10000_5000_nodes",
                    "SF_15_graphs_10000_5000_nodes",
                    "GRP_15_graphs_10000_5000_nodes"],
    "split_seeds": [10],
    "generation_seeds": [10]
}


for graph_file in param["graph_files"]:
    for genseed in param["generation_seeds"]:
        for num_copies in param["num_copies"]:
            for splitseed in param["split_seeds"]:
        
                with open(f"./delivery/graphs/{graph_file}_{genseed}_genseed.pickle","rb") as fopen:
                    list_data = pickle.load(fopen)

                num_graph = len(list_data)
                assert param["num_train"]+param["num_test"] == num_graph,"Required split size doesn't match number of graphs in pickle file."
            
                #For training split
                if param["num_train"] > 0:
                    random.seed(splitseed)
                    list_graph, list_n_sequence, list_node_num, cent_mat, deg_mat = create_dataset(list_data[:param["num_train"]],num_copies = num_copies, adj_size=param["adj_size"])

                    with open(f"./delivery/data_splits/train/{graph_file}_{genseed}_genseed_{param['num_train']}_train_{num_copies}_copies_{param['adj_size']}_size_{splitseed}_splitseed.pickle","wb") as fopen:
                        pickle.dump([list_graph,list_n_sequence,list_node_num,cent_mat, deg_mat],fopen)

                #For test split
                if param["num_test"] > 0:
                    random.seed(splitseed)
                    list_graph, list_n_sequence, list_node_num, cent_mat, deg_mat = create_dataset(list_data[param["num_train"]:param["num_train"]+param["num_test"]],num_copies = 1,adj_size=param["adj_size"])

                    with open(f"./delivery/data_splits/test/{graph_file}_{genseed}_genseed_{param['num_test']}_test_{param['adj_size']}_size_{splitseed}_splitseed.pickle","wb") as fopen:
                        pickle.dump([list_graph,list_n_sequence,list_node_num,cent_mat, deg_mat],fopen)


# Analysing synthetic graphs performance

In [4]:
 
from model_bet import *
import pandas as pd

param = {
    "graph_files": ["ER_15_graphs_10000_5000_nodes",
                    "SF_15_graphs_10000_5000_nodes",
                    "GRP_15_graphs_10000_5000_nodes"],

    "generation_seeds": [10],
    "split_seeds": [10],
    "num_copies": [100],
    "adj_size" : 10000,
    "num_train" : 5,
    "num_test" : 10,
    "model_seeds": [15],
    "num_epochs": 15,
}

Results = { "gtype_train":[],
            "generation_seed":[],
            "splilt_seed": [],
            "copies":[],
            "adj_size": [],
            "model_seed": [],
            "epochs": [],
            "kendalltau":[],
            "std":[]}

for graph_file in param["graph_files"]:
    for genseed in param["generation_seeds"]:
        for splitseed in param["split_seeds"]:
            
            test_file = f"{graph_file}_{genseed}_genseed_{param['num_test']}_test_{param['adj_size']}_size_{splitseed}_splitseed.pickle"
            #Load test data
            with open("./delivery/data_splits/test/"+test_file,"rb") as fopen:
                list_graph_test,list_n_seq_test,list_num_node_test,bc_mat_test,deg_mat_test = pickle.load(fopen)

            list_adj_test,list_adj_t_test = graph_to_adj_bet(list_graph_test,list_n_seq_test,list_num_node_test,param['adj_size'])

            for num_copies in param["num_copies"]:

                train_file = f"{graph_file}_{genseed}_genseed_{param['num_train']}_train_{num_copies}_copies_{param['adj_size']}_size_{splitseed}_splitseed.pickle"
                #Load training data
                print(f"Loading data...")
                with open("./delivery/data_splits/train/"+train_file,"rb") as fopen:
                    list_graph_train,list_n_seq_train,list_num_node_train,bc_mat_train,deg_mat_train = pickle.load(fopen)

                model_size = bc_mat_train.shape[0]
                assert model_size == param['adj_size']
                
                list_adj_train,list_adj_t_train = graph_to_adj_bet(list_graph_train,list_n_seq_train,list_num_node_train,param['adj_size'])
                
                for model_seed in param["model_seeds"]:
                    #Model parameters

                    torch.manual_seed(model_seed)
                    
                    hidden = 20
                    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
                    model = GNN_Bet(ninput=model_size,nhid=hidden,dropout=0.6)
                    model.to(device)

                    optimizer = torch.optim.Adam(model.parameters(),lr=0.0005)
                    num_epoch = param["num_epochs"]

                    print(f"Training, total Number of epoches: {num_epoch}")
                    for e in range(num_epoch):
                        print(f"Epoch number: {e+1}/{num_epoch}")
                        train(list_adj_train,list_adj_t_train,list_num_node_train,bc_mat_train,model,device,optimizer,model_size)

                        #to check test loss while training
                        with torch.no_grad():
                            r = test(list_adj_test,list_adj_t_test,list_num_node_test,bc_mat_test,deg_mat_test,model,device,model_size)

                        Results["gtype_train"].append(train_file)
                        Results["generation_seed"].append(genseed)
                        Results["splilt_seed"].append(splitseed)
                        Results["copies"].append(num_copies)
                        Results["adj_size"].append(model_size)
                        Results["model_seed"].append(model_seed)
                        Results["epochs"].append(e)
                        Results["kendalltau"].append(r["kt"])
                        Results["std"].append(r["std"])

                        df = pd.DataFrame.from_dict(Results)
                        #df.to_csv("output_synthetic_graphs_performance.csv")
                        df.to_csv("./delivery/output_synthetic_graphs_performance_exp1.csv")


Processing 10 graphs...
Loading data...
Processing 25 graphs...
Training, total Number of epoches: 15
Epoch number: 1/15
   Average KT score on test graphs is: 0.6433707281240552 and std: 0.07823708944609986
Epoch number: 2/15
   Average KT score on test graphs is: 0.6848323000762395 and std: 0.0662049125692346
Epoch number: 3/15
   Average KT score on test graphs is: 0.7433603418164613 and std: 0.06145596375996941
Epoch number: 4/15
   Average KT score on test graphs is: 0.7836446000032846 and std: 0.06042670194897764
Epoch number: 5/15
   Average KT score on test graphs is: 0.8020562728130729 and std: 0.06122871859264953
Epoch number: 6/15
   Average KT score on test graphs is: 0.8115943377123658 and std: 0.06103639687297047
Epoch number: 7/15
   Average KT score on test graphs is: 0.8188163937644557 and std: 0.060641768160102805
Epoch number: 8/15
   Average KT score on test graphs is: 0.8243984219262295 and std: 0.06012270416232539
Epoch number: 9/15
   Average KT score on test gra

KeyboardInterrupt: 

## Creating datasets varying replication parameter

In [5]:
param = {
    "adj_size" : 10000,
    "num_train" : 5,
    "num_test" : 10,
    "num_copies": [1,2,10,20,40],
    "graph_files": ["ER_15_graphs_10000_5000_nodes",
                    "SF_15_graphs_10000_5000_nodes",
                    "GRP_15_graphs_10000_5000_nodes"],
    "split_seeds": [10],
    "generation_seeds": [10]
}


for graph_file in param["graph_files"]:
    for genseed in param["generation_seeds"]:
        for num_copies in param["num_copies"]:
            for splitseed in param["split_seeds"]:
        
                with open(f"./delivery/graphs/{graph_file}_{genseed}_genseed.pickle","rb") as fopen:
                    list_data = pickle.load(fopen)

                num_graph = len(list_data)
                assert param["num_train"]+param["num_test"] == num_graph,"Required split size doesn't match number of graphs in pickle file."
            
                #For training split
                if param["num_train"] > 0:
                    random.seed(splitseed)
                    list_graph, list_n_sequence, list_node_num, cent_mat, deg_mat = create_dataset(list_data[:param["num_train"]],num_copies = num_copies, adj_size=param["adj_size"])

                    with open(f"./delivery/data_splits/train/{graph_file}_{genseed}_genseed_{param['num_train']}_train_{num_copies}_copies_{param['adj_size']}_size_{splitseed}_splitseed.pickle","wb") as fopen:
                        pickle.dump([list_graph,list_n_sequence,list_node_num,cent_mat, deg_mat],fopen)

                #For test split
                if param["num_test"] > 0:
                    random.seed(splitseed)
                    list_graph, list_n_sequence, list_node_num, cent_mat, deg_mat = create_dataset(list_data[param["num_train"]:param["num_train"]+param["num_test"]],num_copies = 1,adj_size=param["adj_size"])

                    with open(f"./delivery/data_splits/test/{graph_file}_{genseed}_genseed_{param['num_test']}_test_{param['adj_size']}_size_{splitseed}_splitseed.pickle","wb") as fopen:
                        pickle.dump([list_graph,list_n_sequence,list_node_num,cent_mat, deg_mat],fopen)


## Analysing performance when varying the replication parameter

In [6]:
 
from model_bet import *
import pandas as pd

param = {
    "graph_files": ["ER_15_graphs_10000_5000_nodes",
                    "SF_15_graphs_10000_5000_nodes",
                    "GRP_15_graphs_10000_5000_nodes"],

    "generation_seeds": [10],
    "split_seeds": [10],
    "num_copies": [1,2,10,20,40],
    "adj_size" : 10000,
    "num_train" : 5,
    "num_test" : 10,
    "model_seeds": [15],
    "num_epochs": 15,
}

Results = { "gtype_train":[],
            "generation_seed":[],
            "splilt_seed": [],
            "copies":[],
            "adj_size": [],
            "model_seed": [],
            "epochs": [],
            "kendalltau":[],
            "std":[]}

for graph_file in param["graph_files"]:
    for genseed in param["generation_seeds"]:
        for splitseed in param["split_seeds"]:
            
            test_file = f"{graph_file}_{genseed}_genseed_{param['num_test']}_test_{param['adj_size']}_size_{splitseed}_splitseed.pickle"
            #Load test data
            with open("./delivery/data_splits/test/"+test_file,"rb") as fopen:
                list_graph_test,list_n_seq_test,list_num_node_test,bc_mat_test,deg_mat_test = pickle.load(fopen)

            list_adj_test,list_adj_t_test = graph_to_adj_bet(list_graph_test,list_n_seq_test,list_num_node_test,param['adj_size'])

            for num_copies in param["num_copies"]:

                train_file = f"{graph_file}_{genseed}_genseed_{param['num_train']}_train_{num_copies}_copies_{param['adj_size']}_size_{splitseed}_splitseed.pickle"
                #Load training data
                print(f"Loading data...")
                with open("./delivery/data_splits/train/"+train_file,"rb") as fopen:
                    list_graph_train,list_n_seq_train,list_num_node_train,bc_mat_train,deg_mat_train = pickle.load(fopen)

                model_size = bc_mat_train.shape[0]
                assert model_size == param['adj_size']
                
                list_adj_train,list_adj_t_train = graph_to_adj_bet(list_graph_train,list_n_seq_train,list_num_node_train,param['adj_size'])
                
                for model_seed in param["model_seeds"]:
                    #Model parameters

                    torch.manual_seed(model_seed)
                    
                    hidden = 20
                    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
                    model = GNN_Bet(ninput=model_size,nhid=hidden,dropout=0.6)
                    model.to(device)

                    optimizer = torch.optim.Adam(model.parameters(),lr=0.0005)
                    num_epoch = param["num_epochs"]

                    print(f"Training, total Number of epoches: {num_epoch}")
                    for e in range(num_epoch):
                        print(f"Epoch number: {e+1}/{num_epoch}")
                        train(list_adj_train,list_adj_t_train,list_num_node_train,bc_mat_train,model,device,optimizer,model_size)

                        #to check test loss while training
                        with torch.no_grad():
                            r = test(list_adj_test,list_adj_t_test,list_num_node_test,bc_mat_test,deg_mat_test,model,device,model_size)

                        Results["gtype_train"].append(train_file)
                        Results["generation_seed"].append(genseed)
                        Results["splilt_seed"].append(splitseed)
                        Results["copies"].append(num_copies)
                        Results["adj_size"].append(model_size)
                        Results["model_seed"].append(model_seed)
                        Results["epochs"].append(e)
                        Results["kendalltau"].append(r["kt"])
                        Results["std"].append(r["std"])

                        df = pd.DataFrame.from_dict(Results)
                        df.to_csv("./delivery/output_synthetic_graphs_performance_exp2.csv")


Processing 10 graphs...
Loading data...
Processing 25 graphs...
Training, total Number of epoches: 15
Epoch number: 1/15
   Average KT score on test graphs is: 0.6433707281240552 and std: 0.07823708944609986
Epoch number: 2/15
   Average KT score on test graphs is: 0.6848323000762395 and std: 0.0662049125692346
Epoch number: 3/15
   Average KT score on test graphs is: 0.7433603418164613 and std: 0.06145596375996941
Epoch number: 4/15
   Average KT score on test graphs is: 0.7836446000032846 and std: 0.06042670194897764
Epoch number: 5/15
   Average KT score on test graphs is: 0.8020562728130729 and std: 0.06122871859264953
Epoch number: 6/15
   Average KT score on test graphs is: 0.8115943377123658 and std: 0.06103639687297047
Epoch number: 7/15
   Average KT score on test graphs is: 0.8188163937644557 and std: 0.060641768160102805
Epoch number: 8/15
   Average KT score on test graphs is: 0.8243984219262295 and std: 0.06012270416232539
Epoch number: 9/15
   Average KT score on test gra

## We generate a set of 10 synthetic graphs for testing

In [9]:


param = {
    "min_nodes": 1000,#5000,
    "max_nodes": 2000,#10000,
    "num_of_graphs": 10,
    "graph_types": ["ER","SF","GRP"],
    "generation_seeds": [10]
}

for graph_type in param["graph_types"]:

    for seed in param["generation_seeds"]:
        
        random.seed(seed)

        print(f"Generating {param['num_of_graphs']} {graph_type} graphs")
        list_bet_data = list()
        for i in range(param['num_of_graphs']):
            print(f"{datetime.now().strftime('%d/%m/%Y %H:%M:%S')}: Graph index:{i+1}/{param['num_of_graphs']}")
            g_nx = create_graph(graph_type,param['min_nodes'],param['max_nodes'])
            
            if nx.number_of_isolates(g_nx)>0:
                g_nx.remove_nodes_from(list(nx.isolates(g_nx)))
                g_nx = nx.convert_node_labels_to_integers(g_nx)

            g_nkit = nx2nkit(g_nx)
            bet_dict = cal_exact_bet(g_nkit)
            deg_dict = cal_exact_degree(g_nkit)
            list_bet_data.append([g_nx,bet_dict,deg_dict])

        fname_bet = f"./delivery/graphs/{graph_type}_{param['num_of_graphs']}_graphs_{param['max_nodes']}_{param['min_nodes']}_nodes_{seed}_genseed.pickle"    

        with open(fname_bet,"wb") as fopen:
            pickle.dump(list_bet_data,fopen)

print("Graphs saved")

Generating 10 ER graphs
26/04/2023 16:40:22: Graph index:1/10
26/04/2023 16:40:22: Graph index:2/10
26/04/2023 16:40:22: Graph index:3/10
26/04/2023 16:40:22: Graph index:4/10
26/04/2023 16:40:22: Graph index:5/10
26/04/2023 16:40:22: Graph index:6/10
26/04/2023 16:40:22: Graph index:7/10
26/04/2023 16:40:23: Graph index:8/10
26/04/2023 16:40:23: Graph index:9/10
26/04/2023 16:40:23: Graph index:10/10
Generating 10 SF graphs
26/04/2023 16:40:23: Graph index:1/10
26/04/2023 16:40:24: Graph index:2/10
26/04/2023 16:40:24: Graph index:3/10
26/04/2023 16:40:25: Graph index:4/10
26/04/2023 16:40:26: Graph index:5/10
26/04/2023 16:40:26: Graph index:6/10
26/04/2023 16:40:27: Graph index:7/10
26/04/2023 16:40:28: Graph index:8/10
26/04/2023 16:40:29: Graph index:9/10
26/04/2023 16:40:30: Graph index:10/10
Generating 10 GRP graphs
26/04/2023 16:40:30: Graph index:1/10
26/04/2023 16:40:31: Graph index:2/10
26/04/2023 16:40:31: Graph index:3/10
26/04/2023 16:40:31: Graph index:4/10
26/04/2023 16

## We generate a set of 5 synthetic training graphs for training using different random seeds

In [10]:


param = {
    "min_nodes": 1000,#5000,
    "max_nodes": 2000,#10000,
    "num_of_graphs": 5,
    "graph_types": ["ER","SF","GRP"],
    "generation_seeds": [j for j in range(5)]
}

for graph_type in param["graph_types"]:

    for seed in param["generation_seeds"]:
        
        #random.seed(seed)

        print(f"Generating {param['num_of_graphs']} {graph_type} graphs")
        list_bet_data = list()
        for i in range(param['num_of_graphs']):
            print(f"{datetime.now().strftime('%d/%m/%Y %H:%M:%S')}: Graph index:{i+1}/{param['num_of_graphs']}")
            g_nx = create_graph(graph_type,param['min_nodes'],param['max_nodes'])
            
            if nx.number_of_isolates(g_nx)>0:
                g_nx.remove_nodes_from(list(nx.isolates(g_nx)))
                g_nx = nx.convert_node_labels_to_integers(g_nx)

            g_nkit = nx2nkit(g_nx)
            bet_dict = cal_exact_bet(g_nkit)
            deg_dict = cal_exact_degree(g_nkit)
            list_bet_data.append([g_nx,bet_dict,deg_dict])

        fname_bet = f"./delivery/graphs/{graph_type}_{param['num_of_graphs']}_graphs_{param['max_nodes']}_{param['min_nodes']}_nodes_{seed}_genseed.pickle"    

        with open(fname_bet,"wb") as fopen:
            pickle.dump(list_bet_data,fopen)

print("Graphs saved")

Generating 5 ER graphs
26/04/2023 16:40:37: Graph index:1/5
26/04/2023 16:40:38: Graph index:2/5
26/04/2023 16:40:38: Graph index:3/5
26/04/2023 16:40:38: Graph index:4/5
26/04/2023 16:40:38: Graph index:5/5
Generating 5 ER graphs
26/04/2023 16:40:39: Graph index:1/5
26/04/2023 16:40:39: Graph index:2/5
26/04/2023 16:40:39: Graph index:3/5
26/04/2023 16:40:39: Graph index:4/5
26/04/2023 16:40:39: Graph index:5/5
Generating 5 ER graphs
26/04/2023 16:40:39: Graph index:1/5
26/04/2023 16:40:39: Graph index:2/5
26/04/2023 16:40:39: Graph index:3/5
26/04/2023 16:40:39: Graph index:4/5
26/04/2023 16:40:40: Graph index:5/5
Generating 5 ER graphs
26/04/2023 16:40:40: Graph index:1/5
26/04/2023 16:40:40: Graph index:2/5
26/04/2023 16:40:40: Graph index:3/5
26/04/2023 16:40:40: Graph index:4/5
26/04/2023 16:40:40: Graph index:5/5
Generating 5 ER graphs
26/04/2023 16:40:40: Graph index:1/5
26/04/2023 16:40:41: Graph index:2/5
26/04/2023 16:40:41: Graph index:3/5
26/04/2023 16:40:41: Graph index:4

## We generate the test splilt

In [19]:
param = {
    "adj_size" : 10000,
    "num_train" : 0,
    "num_test" : 10,
    "num_copies": [10],
    "graph_files": ["ER_10_graphs_2000_1000_nodes",
                    "SF_10_graphs_2000_1000_nodes",
                    "GRP_10_graphs_2000_1000_nodes"],
    "split_seeds": [0],
    "generation_seeds": [10]
}


for graph_file in param["graph_files"]:
    for genseed in param["generation_seeds"]:
        for num_copies in param["num_copies"]:
            for splitseed in param["split_seeds"]:
        
                with open(f"./delivery/graphs/{graph_file}_{genseed}_genseed.pickle","rb") as fopen:
                    list_data = pickle.load(fopen)

                num_graph = len(list_data)
                assert param["num_train"]+param["num_test"] == num_graph,"Required split size doesn't match number of graphs in pickle file."
            
                #For training split
                if param["num_train"] > 0:
                    random.seed(splitseed)
                    list_graph, list_n_sequence, list_node_num, cent_mat, deg_mat = create_dataset(list_data[:param["num_train"]],num_copies = num_copies, adj_size=param["adj_size"])

                    with open(f"./delivery/data_splits/train/{graph_file}_{genseed}_genseed_{param['num_train']}_train_{num_copies}_copies_{param['adj_size']}_size_{splitseed}_splitseed.pickle","wb") as fopen:
                        pickle.dump([list_graph,list_n_sequence,list_node_num,cent_mat, deg_mat],fopen)

                #For test split
                if param["num_test"] > 0:
                    random.seed(splitseed)
                    list_graph, list_n_sequence, list_node_num, cent_mat, deg_mat = create_dataset(list_data[param["num_train"]:param["num_train"]+param["num_test"]],num_copies = 1,adj_size=param["adj_size"])

                    with open(f"./delivery/data_splits/test/{graph_file}_{genseed}_genseed_{param['num_test']}_test_{param['adj_size']}_size_{splitseed}_splitseed.pickle","wb") as fopen:
                        pickle.dump([list_graph,list_n_sequence,list_node_num,cent_mat, deg_mat],fopen)


## We generate the splits for the training sets with different random seed generation

In [20]:
param = {
    "adj_size" : 10000,
    "num_train" : 5,
    "num_test" : 0,
    "num_copies": [10],
    "graph_files": ["ER_5_graphs_2000_1000_nodes",
                    "SF_5_graphs_2000_1000_nodes",
                    "GRP_5_graphs_2000_1000_nodes"],
    "split_seeds": [j for j in range(5)],
    "generation_seeds": [0]
}


for graph_file in param["graph_files"]:
    for genseed in param["generation_seeds"]:
        for num_copies in param["num_copies"]:
            for splitseed in param["split_seeds"]:
        
                with open(f"./delivery/graphs/{graph_file}_{genseed}_genseed.pickle","rb") as fopen:
                    list_data = pickle.load(fopen)

                num_graph = len(list_data)
                assert param["num_train"]+param["num_test"] == num_graph,"Required split size doesn't match number of graphs in pickle file."
            
                #For training split
                if param["num_train"] > 0:
                    random.seed(splitseed)
                    list_graph, list_n_sequence, list_node_num, cent_mat, deg_mat = create_dataset(list_data[:param["num_train"]],num_copies = num_copies, adj_size=param["adj_size"])

                    with open(f"./delivery/data_splits/train/{graph_file}_{genseed}_genseed_{param['num_train']}_train_{num_copies}_copies_{param['adj_size']}_size_{splitseed}_splitseed.pickle","wb") as fopen:
                        pickle.dump([list_graph,list_n_sequence,list_node_num,cent_mat, deg_mat],fopen)

                #For test split
                if param["num_test"] > 0:
                    random.seed(splitseed)
                    list_graph, list_n_sequence, list_node_num, cent_mat, deg_mat = create_dataset(list_data[param["num_train"]:param["num_train"]+param["num_test"]],num_copies = 1,adj_size=param["adj_size"])

                    with open(f"./delivery/data_splits/test/{graph_file}_{genseed}_genseed_{param['num_test']}_test_{param['adj_size']}_size_{splitseed}_splitseed.pickle","wb") as fopen:
                        pickle.dump([list_graph,list_n_sequence,list_node_num,cent_mat, deg_mat],fopen)


## We train and test using the diferent graphs created

In [21]:
param = {
    "adj_size" : 10000,
    "num_train" : 5,
    "num_test" : 0,
    "num_copies": [10],
    "graph_files": ["ER_5_graphs_2000_1000_nodes",
                    "SF_5_graphs_2000_1000_nodes",
                    "GRP_5_graphs_2000_1000_nodes"],
    "split_seeds": [0],
    "generation_seeds": [j for j in range(5)]
}


for graph_file in param["graph_files"]:
    for genseed in param["generation_seeds"]:
        for num_copies in param["num_copies"]:
            for splitseed in param["split_seeds"]:
        
                with open(f"./delivery/graphs/{graph_file}_{genseed}_genseed.pickle","rb") as fopen:
                    list_data = pickle.load(fopen)

                num_graph = len(list_data)
                assert param["num_train"]+param["num_test"] == num_graph,"Required split size doesn't match number of graphs in pickle file."
            
                #For training split
                if param["num_train"] > 0:
                    random.seed(splitseed)
                    list_graph, list_n_sequence, list_node_num, cent_mat, deg_mat = create_dataset(list_data[:param["num_train"]],num_copies = num_copies, adj_size=param["adj_size"])

                    with open(f"./delivery/data_splits/train/{graph_file}_{genseed}_genseed_{param['num_train']}_train_{num_copies}_copies_{param['adj_size']}_size_{splitseed}_splitseed.pickle","wb") as fopen:
                        pickle.dump([list_graph,list_n_sequence,list_node_num,cent_mat, deg_mat],fopen)

                #For test split
                if param["num_test"] > 0:
                    random.seed(splitseed)
                    list_graph, list_n_sequence, list_node_num, cent_mat, deg_mat = create_dataset(list_data[param["num_train"]:param["num_train"]+param["num_test"]],num_copies = 1,adj_size=param["adj_size"])

                    with open(f"./delivery/data_splits/test/{graph_file}_{genseed}_genseed_{param['num_test']}_test_{param['adj_size']}_size_{splitseed}_splitseed.pickle","wb") as fopen:
                        pickle.dump([list_graph,list_n_sequence,list_node_num,cent_mat, deg_mat],fopen)


## We train and test using the different graph created

In [25]:
 
from model_bet import *
import pandas as pd

param = {
    "graph_types": ["ER","SF","GRP"],
    "graphs_sizes": "2000_1000_nodes",
    "test_generation_seeds": [10],
    "train_generation_seeds": [j for j in range(5)],
    "test_split_seeds": [0],
    "train_split_seeds": [0],
    "num_copies": [10],
    "adj_size" : 10000,
    "num_train" : 5,
    "num_test" : 10,
    "model_seeds": [15],
    "num_epochs": 15,
}

Results = { "gtype_train":[],
            "train_generation_seed": [],
            "train_splilt_seed": [],
            "test_generation_seed": [],
            "test_splilt_seed": [],
            "copies":[],
            "adj_size": [],
            "model_seed": [],
            "epochs": [],
            "kendalltau":[],
            "std":[]}





for testgenseed in param["test_generation_seeds"]:
    for testsplitseed in param["test_split_seeds"]:
        
        test_file = f"{graph_type}_{param['num_test']}_graphs_{param['graphs_sizes']}_{testgenseed}_genseed_{param['num_test']}_test_{param['adj_size']}_size_{testsplitseed}_splitseed.pickle"
        #Load test data
        with open("./delivery/data_splits/test/"+test_file,"rb") as fopen:
            list_graph_test,list_n_seq_test,list_num_node_test,bc_mat_test,deg_mat_test = pickle.load(fopen)

        list_adj_test,list_adj_t_test = graph_to_adj_bet(list_graph_test,list_n_seq_test,list_num_node_test,param['adj_size'])

        for graph_type in param["graph_types"]:
            for traingenseed in param["train_generation_seeds"]:
                for trainsplitseed in param["train_split_seeds"]:
                    for num_copies in param["num_copies"]:
                        
                        train_file = f"{graph_type}_{param['num_train']}_graphs_{param['graphs_sizes']}_{traingenseed}_genseed_{param['num_train']}_train_{num_copies}_copies_{param['adj_size']}_size_{trainsplitseed}_splitseed.pickle"

                        #Load training data
                        print(f"Loading data...")
                        with open("./delivery/data_splits/train/"+train_file,"rb") as fopen:
                            list_graph_train,list_n_seq_train,list_num_node_train,bc_mat_train,deg_mat_train = pickle.load(fopen)

                        list_adj_train,list_adj_t_train = graph_to_adj_bet(list_graph_train,list_n_seq_train,list_num_node_train,param['adj_size'])

                        model_size = bc_mat_train.shape[0]
                        assert model_size == param['adj_size']
                        
                        for model_seed in param["model_seeds"]:
                            #Model parameters

                            torch.manual_seed(model_seed)
                            
                            hidden = 20
                            device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
                            model = GNN_Bet(ninput=model_size,nhid=hidden,dropout=0.6)
                            model.to(device)

                            optimizer = torch.optim.Adam(model.parameters(),lr=0.0005)
                            num_epoch = param["num_epochs"]

                            print(f"Training, total Number of epoches: {num_epoch}")
                            for e in range(num_epoch):
                                print(f"Epoch number: {e+1}/{num_epoch}")
                                train(list_adj_train,list_adj_t_train,list_num_node_train,bc_mat_train,model,device,optimizer,model_size)

                                #to check test loss while training
                                with torch.no_grad():
                                    r = test(list_adj_test,list_adj_t_test,list_num_node_test,bc_mat_test,deg_mat_test,model,device,model_size)

                                Results["gtype_train"].append(train_file)
                                Results["train_generation_seed"].append(traingenseed)
                                Results["train_splilt_seed"].append(trainsplitseed)
                                Results["test_generation_seed"].append(testgenseed)
                                Results["test_splilt_seed"].append(testsplitseed)
                                Results["copies"].append(num_copies)
                                Results["adj_size"].append(model_size)
                                Results["model_seed"].append(model_seed)
                                Results["epochs"].append(e)
                                Results["kendalltau"].append(r["kt"])
                                Results["std"].append(r["std"])

                                df = pd.DataFrame.from_dict(Results)
                                df.to_csv("./delivery/output_synthetic_graphs_performance_variating_random_seeds.csv")


Processing 10 graphs...
Loading data...
Processing 50 graphs...
Training, total Number of epoches: 15
Epoch number: 1/15
   Average KT score on test graphs is: 0.7467893824796327 and std: 0.07645942572944546
Epoch number: 2/15
   Average KT score on test graphs is: 0.8131740429052442 and std: 0.05670450560938412
Epoch number: 3/15
   Average KT score on test graphs is: 0.834524012195136 and std: 0.055753792869293536
Epoch number: 4/15
   Average KT score on test graphs is: 0.8453164649658869 and std: 0.05352057238808083
Epoch number: 5/15
   Average KT score on test graphs is: 0.8539757671010759 and std: 0.052870334949941575
Epoch number: 6/15
   Average KT score on test graphs is: 0.8602889009741974 and std: 0.0524043557951551
Epoch number: 7/15
   Average KT score on test graphs is: 0.8650949217734029 and std: 0.05220560815431934
Epoch number: 8/15
   Average KT score on test graphs is: 0.8690275935563163 and std: 0.05249874241607539
Epoch number: 9/15
   Average KT score on test gra

KeyboardInterrupt: 