In [1]:
from utils import *

# We generate the graphs...

In [5]:
random.seed(10)

param = {
    "min_nodes": 5000,
    "max_nodes": 10000,
    "num_of_graphs": 10,
    "graph_types": ["ER","SF","GRP"]
}

for graph_type in param["graph_types"]:
    print("###################")
    print(f"Generating graph type : {graph_type}")
    print(f"Number of graphs to be generated:{param['num_of_graphs']}")
    list_bet_data = list()
    print("Generating graphs and calculating centralities...")
    for i in range(param['num_of_graphs']):
        print(f"{datetime.now().strftime('%d/%m/%Y %H:%M:%S')}: Graph index:{i+1}/{param['num_of_graphs']}")
        g_nx = create_graph(graph_type,param['min_nodes'],param['max_nodes'])
        
        if nx.number_of_isolates(g_nx)>0:
            #print("Graph has isolates.")
            g_nx.remove_nodes_from(list(nx.isolates(g_nx)))
            g_nx = nx.convert_node_labels_to_integers(g_nx)
        g_nkit = nx2nkit(g_nx)
        bet_dict = cal_exact_bet(g_nkit)
        deg_dict = cal_exact_degree(g_nkit)
        list_bet_data.append([g_nx,bet_dict,deg_dict])

    fname_bet = f"./graphs/{graph_type}_{param['num_of_graphs']}_graphs_{param['max_nodes']}_{param['min_nodes']}_nodes.pickle"    

    with open(fname_bet,"wb") as fopen:
        pickle.dump(list_bet_data,fopen)

print("")
print("Graphs saved")

###################
Generating graph type : ER
Number of graphs to be generated:10
Generating graphs and calculating centralities...
27/03/2023 16:20:31: Graph index:1/10
27/03/2023 16:20:48: Graph index:2/10
27/03/2023 16:20:51: Graph index:3/10
27/03/2023 16:21:09: Graph index:4/10
27/03/2023 16:21:19: Graph index:5/10
27/03/2023 16:21:30: Graph index:6/10
27/03/2023 16:21:40: Graph index:7/10
27/03/2023 16:21:51: Graph index:8/10
27/03/2023 16:22:00: Graph index:9/10
27/03/2023 16:22:09: Graph index:10/10
###################
Generating graph type : SF
Number of graphs to be generated:10
Generating graphs and calculating centralities...
27/03/2023 16:22:15: Graph index:1/10
27/03/2023 16:22:31: Graph index:2/10
27/03/2023 16:22:40: Graph index:3/10
27/03/2023 16:22:52: Graph index:4/10
27/03/2023 16:23:00: Graph index:5/10
27/03/2023 16:23:13: Graph index:6/10
27/03/2023 16:23:30: Graph index:7/10
27/03/2023 16:23:55: Graph index:8/10
27/03/2023 16:24:17: Graph index:9/10
27/03/2023 

# Creating datasets

In [12]:
random.seed(10)

param = {
    "adj_size" : 10000,
    "num_train" : 5,
    "num_test" : 0,
    "num_copies": [10],#[1,2,10,20,40],
    "files" : ["ER_5_graphs_10000_5000_nodes.pickle",
               "SF_5_graphs_10000_5000_nodes.pickle",
               "GRP_5_graphs_10000_5000_nodes.pickle"]
}

for file in param["files"]:
    
    for num_copies in param["num_copies"]:
        
        for n in range(5):

            with open(f"./graphs/{file[:-7]}_{n}.pickle","rb") as fopen:
                list_data = pickle.load(fopen)

            num_graph = len(list_data)
            assert param["num_train"]+param["num_test"] == num_graph,"Required split size doesn't match number of graphs in pickle file."
        
            #For training split
            if param["num_train"] > 0:
                list_graph, list_n_sequence, list_node_num, cent_mat, deg_mat = create_dataset(list_data[:param["num_train"]],num_copies = num_copies, adj_size=param["adj_size"])

                with open(f"./data_splits/train/{file[:-7]}_{n}_{num_copies}_copies_{param['num_train']}_train_{n}_setgraph.pickle","wb") as fopen:
                    pickle.dump([list_graph,list_n_sequence,list_node_num,cent_mat, deg_mat],fopen)

            #For test split
            if param["num_test"] > 0:
                list_graph, list_n_sequence, list_node_num, cent_mat, deg_mat = create_dataset(list_data[param["num_train"]:param["num_train"]+param["num_test"]],num_copies = 1,adj_size=param["adj_size"])

                with open(f"./data_splits/test/{file[:-7]}_{n}_{param['adj_size']}_size.pickle","wb") as fopen:
                    pickle.dump([list_graph,list_n_sequence,list_node_num,cent_mat, deg_mat],fopen)


# Analysing synthetic graphs performance

In [13]:
 
from model_bet import *
import pandas as pd

Results = { "gtype":[],
            "segraph":[],
            "copies":[],
            "epochs": [],
            "kendalltau":[],
            "std":[]}

for gtype in ["GRP","ER","SF"]:

    for itersplit in range(5):
        
        print(f"{gtype}, {itersplit}")

        #Load test data
        with open("./data_splits/test/"+f"{gtype}_10_graphs_10000_5000_nodes_10000_size.pickle","rb") as fopen:
            list_graph_test,list_n_seq_test,list_num_node_test,bc_mat_test,deg_mat_test = pickle.load(fopen)
        
        data_path = f"{gtype}_5_graphs_10000_5000_nodes_{itersplit}_10_copies_5_train_{itersplit}_setgraph.pickle"

        #Load training data
        print(f"Loading data...")
        with open("./data_splits/train/"+data_path,"rb") as fopen:
            list_graph_train,list_n_seq_train,list_num_node_train,bc_mat_train,deg_mat_train = pickle.load(fopen)

        model_size = bc_mat_train.shape[0]
        print(f"Model size: {model_size}")
        #Get adjacency matrices from graphs
        print(f"Graphs to adjacency conversion.")

        list_adj_train,list_adj_t_train = graph_to_adj_bet(list_graph_train,list_n_seq_train,list_num_node_train,model_size)
        list_adj_test,list_adj_t_test = graph_to_adj_bet(list_graph_test,list_n_seq_test,list_num_node_test,model_size)

        #Model parameters
        
        torch.manual_seed(15)

        hidden = 20

        device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
        model = GNN_Bet(ninput=model_size,nhid=hidden,dropout=0.6)
        model.to(device)

        optimizer = torch.optim.Adam(model.parameters(),lr=0.0005)
        num_epoch = 10

        print("Training")
        print(f"Total Number of epoches: {num_epoch}")
        for e in range(num_epoch):
            print(f"Epoch number: {e+1}/{num_epoch}")
            train(list_adj_train,list_adj_t_train,list_num_node_train,bc_mat_train,model,device,optimizer,model_size)

            #to check test loss while training
            with torch.no_grad():
                r = test(list_adj_test,list_adj_t_test,list_num_node_test,bc_mat_test,deg_mat_test,model,device,model_size)

            Results["gtype"].append(gtype)
            Results["segraph"].append(itersplit)
            Results["copies"].append(10)
            Results["epochs"].append(e)
            Results["kendalltau"].append(r["kt"])
            Results["std"].append(r["std"])

            df = pd.DataFrame.from_dict(Results)
            df.to_csv("output_synthetic_graphs_performance_different_setgraphs.csv")


GRP, 0
Loading data...
Model size: 10000
Graphs to adjacency conversion.
Processing graphs: 50/50
Processing graphs: 10/10
Training
Total Number of epoches: 10
Epoch number: 1/10
   Average KT score on test graphs is: 0.7909051637973425 and std: 0.02087656159741351
Epoch number: 2/10
   Average KT score on test graphs is: 0.8447206607430584 and std: 0.01752277162003258
Epoch number: 3/10
   Average KT score on test graphs is: 0.8596758306646967 and std: 0.011544774378721614
Epoch number: 4/10
   Average KT score on test graphs is: 0.8696262512731533 and std: 0.00956230524462795
Epoch number: 5/10
   Average KT score on test graphs is: 0.8770581224238576 and std: 0.008599373178355744
Epoch number: 6/10
   Average KT score on test graphs is: 0.8836344879586802 and std: 0.008590855226052511
Epoch number: 7/10
   Average KT score on test graphs is: 0.8872995109245322 and std: 0.007616972576032844
Epoch number: 8/10
   Average KT score on test graphs is: 0.891976889012015 and std: 0.0079619