In [2]:
import os
import pickle
import pandas as pd
import random
import networkx as nx

from src.envs.utils import (
                            RandomErdosRenyiGraphGenerator,
                            RandomBarabasiAlbertGraphGenerator,
                            RandomRegularGraphGenerator,
                            RandomWattsStrogatzGraphGenerator,
                            RandomHomleKimGraphGenerator,
                            RandomCompleteGraphGenerator,
                            EdgeType,
                            )
from scipy.sparse import csr_matrix,save_npz,load_npz
from experiments.utils import load_graph_set, mk_dir


# Extra Distributions

In [5]:
def generate_dataset(num_train=4000,num_test=100,num_val=50,graph_gen=None,distribution=None):

    def subroutine(folder, num_instances):
        for i in range(num_instances):
            graph = graph_gen.get()
            save_file_name = f'{distribution}_graph_{str(i).zfill(4)}'
            save_file_path = os.path.join(folder, save_file_name)
            sparse_matrix = csr_matrix(graph)
            save_npz(save_file_path, sparse_matrix)

    train_folder = f'../data/training/{distribution}'
    test_folder = f'../data/testing/{distribution}'
    val_folder = f'../data/validation/{distribution}'
    mk_dir(train_folder)
    mk_dir(test_folder)
    mk_dir(val_folder)

    subroutine(train_folder, num_train)
    subroutine(test_folder, num_test)
    subroutine(val_folder, num_val)

generate_dataset(graph_gen=RandomWattsStrogatzGraphGenerator(n_spins=200,k=4,p=0.15,edge_type=EdgeType.DISCRETE),
                 distribution="WattsStrogatz_200vertices_weighted") 

generate_dataset(graph_gen=RandomWattsStrogatzGraphGenerator(n_spins=200,k=4,p=0.15,edge_type=EdgeType.UNIFORM),
                 distribution="WattsStrogatz_200vertices_unweighted")    
    
# generate_dataset(graph_gen=RandomHomleKimGraphGenerator(n_spins=200,m=4,p=0.10,edge_type=EdgeType.DISCRETE),
#                  distribution="HomleKim_200vertices_weighted")

# generate_dataset(graph_gen=RandomHomleKimGraphGenerator(n_spins=200,m=4,p=0.10,edge_type=EdgeType.UNIFORM),
#                  distribution="HomleKim_200vertices_unweighted")
    

    




dir already exists:  ../data/training/WattsStrogatz_200vertices_weighted
dir already exists:  ../data/testing/WattsStrogatz_200vertices_weighted
dir already exists:  ../data/validation/WattsStrogatz_200vertices_weighted
dir already exists:  ../data/training/WattsStrogatz_200vertices_unweighted
dir already exists:  ../data/testing/WattsStrogatz_200vertices_unweighted
dir already exists:  ../data/validation/WattsStrogatz_200vertices_unweighted


# Minimum domninating set

In [None]:
# number_of_vertices = [20,40,60,100,200]
number_of_vertices=[(200,300),(800,1200)]
distributions = ['BA']
number_of_instances=50

# for distribution in distributions:
for n in number_of_vertices:
    folder=f'../data_MDS/validation/BA_{n[0]}-{n[1]}'
    mk_dir(folder)

    
    
    # if distribution=='ER':

    #     train_graph_generator = RandomErdosRenyiGraphGenerator(n_spins=n,
    #                                                             p_connection=0.15,
    #                                                             edge_type=EdgeType.DISCRETE)
    # else:
    #     train_graph_generator = RandomBarabasiAlbertGraphGenerator(n_spins= n,
    #                                                                m_insertion_edges=4,
    #                                                                edge_type=EdgeType.DISCRETE)

    for i in range(number_of_instances):
        spin=random.randint(n[0],n[1])
        graph=nx.barabasi_albert_graph(n=spin,m=4)
        # graph_generator= RandomBarabasiAlbertGraphGenerator(n_spins= random.randint(n[0],n[1]),
        #                                                     m_insertion_edges=4,
        #                                                     edge_type=EdgeType.UNIFORM)
        # graph=graph_generator.get()
        graph=nx.to_numpy_array(graph)
        save_file_name = f'graph_{str(i).zfill(4)}'
        save_file_path=os.path.join(folder,save_file_name)
        sparse_matrix = csr_matrix(graph)
        
        save_npz(save_file_path, sparse_matrix)
        


# ER and BA 

In [None]:
number_of_vertices = [20,40,60,100,200]
distributions = ['ER','BA']
number_of_instances=4000

for distribution in distributions:
    for n in number_of_vertices:
        folder=f'data/training/{distribution}_{n}'
        mk_dir(folder)
        
        if distribution=='ER':

            train_graph_generator = RandomErdosRenyiGraphGenerator(n_spins=n,
                                                                    p_connection=0.15,
                                                                    edge_type=EdgeType.DISCRETE)
        else:
            train_graph_generator = RandomBarabasiAlbertGraphGenerator(n_spins= n,
                                                                       m_insertion_edges=4,
                                                                       edge_type=EdgeType.DISCRETE)

        for i in range(number_of_instances):
            graph=train_graph_generator.get()
            save_file_name = f'{distribution}_{n}vertices_graph_{i}'
            save_file_path=os.path.join(folder,save_file_name)
            sparse_matrix = csr_matrix(graph)
            
            save_npz(save_file_path, sparse_matrix)
        


In [None]:
import shutil
import pickle
import pandas as pd

number_of_vertices = [20,40,60,100,200]
distributions = ['ER','BA']
for distribution in distributions:
    for n in number_of_vertices:
        test_folder=f'../data/testing/{distribution}_{n}'
        val_folder=f'../data/validation/{distribution}_{n}'
        shutil.rmtree(test_folder)
        shutil.rmtree(val_folder)
        # os.rmdir(test_folder)
        # os.rmdir(val_folder)
        mk_dir(test_folder)
        mk_dir(val_folder)
        
        if distribution=='ER':
            val_graph_save_loc=f'../_graphs/testing/ER_{n}spin_p15_50graphs.pkl'
            test_graph_save_loc=f'../_graphs/validation/ER_{n}spin_p15_100graphs.pkl'
            opt_test=pickle.load(open(f'../_graphs/validation/opts/cuts_ER_{n}spin_p15_100graphs.pkl', 'rb'))
        elif distribution=='BA':
            val_graph_save_loc=f"../_graphs/testing/BA_{n}spin_m4_50graphs.pkl"
            test_graph_save_loc=f"../_graphs/validation/BA_{n}spin_m4_100graphs.pkl"
            opt_test=pickle.load(open(f'../_graphs/validation/opts/cuts_BA_{n}spin_m4_100graphs.pkl', 'rb'))
            
        graphs_test = load_graph_set(test_graph_save_loc)
        graphs_val = load_graph_set(val_graph_save_loc)

        test_files=[]
        
        for i,graph in enumerate(graphs_test):
            save_file_name = f'{distribution}_{n}vertices_graph_{str(i).zfill(4)}'
            save_file_path=os.path.join(test_folder,save_file_name)
            sparse_matrix = csr_matrix(graph)
            save_npz(save_file_path, sparse_matrix)
            test_files.append(save_file_name)

        
        opt_file={'Instance':test_files,'OPT':opt_test}
        df=pd.DataFrame(opt_file)
        df.to_pickle(f'../data/testing/{distribution}_{n}/optimal')

        for i,graph in enumerate(graphs_val):
            save_file_name = f'{distribution}_{n}vertices_graph_{str(i).zfill(4)}'
            save_file_path=os.path.join(val_folder,save_file_name)
            sparse_matrix = csr_matrix(graph)
            save_npz(save_file_path, sparse_matrix)
            

In [None]:
df

# Physics 

In [None]:
number_of_vertices = [125]

number_of_train_instances=4000
number_of_val_instances=50

distribution='Physics'

for n in number_of_vertices:
    train_folder=f'../data/training/{distribution}'
    val_folder=f'../data/validation/{distribution}'
    mk_dir(train_folder)
    mk_dir(val_folder)

    graph_generator=RandomRegularGraphGenerator(n_spins=n,d=6,edge_type=EdgeType.DISCRETE)
    
    

    for i in range(number_of_train_instances):
        graph=graph_generator.get()
        save_file_name = f'{distribution}_{n}vertices_graph_{str(i).zfill(4)}'
        save_file_path=os.path.join(train_folder,save_file_name)
        sparse_matrix = csr_matrix(graph)
        save_npz(save_file_path, sparse_matrix)

    for i in range(number_of_val_instances):
        graph=graph_generator.get()
        save_file_name = f'{distribution}_{n}vertices_graph_{str(i).zfill(4)}'
        save_file_path=os.path.join(val_folder,save_file_name)
        sparse_matrix = csr_matrix(graph)
        save_npz(save_file_path, sparse_matrix)
        


In [None]:

physics_save_loc=f'../_graphs/benchmarks/ising_125spin_graphs.pkl'
graphs_test=load_graph_set(physics_save_loc)

save_folder='../data/testing/physics'
os.makedirs(save_folder,exist_ok=True)

instances=[]

for i,graph in enumerate(graphs_test):
    save_file_name = f'physics_graph_{str(i).zfill(4)}'
    save_file_path=os.path.join(save_folder,save_file_name)
    sparse_matrix = csr_matrix(graph)
    save_npz(save_file_path, sparse_matrix)
    instances.append(save_file_name)

opt=pickle.load(open(f'../_graphs/benchmarks/opts/cuts_ising_125spin.pkl', 'rb'))

opt_file={'Instance':instances,'OPT':opt}
df=pd.DataFrame(opt_file)
df.to_pickle(f'../data/testing/physics/optimal')





# Gflow-net

In [None]:
number_of_vertices = [125]

number_of_train_instances=4000
number_of_val_instances=50
number_of_test_instances=100

distribution='Physics'

for n in number_of_vertices:
    train_folder=f'../data/training/{distribution}'
    val_folder=f'../data/validation/{distribution}'
    test_folder=f'../data/testing/{distribution}'
    mk_dir(train_folder)
    mk_dir(val_folder)

    graph_generator=RandomRegularGraphGenerator(n_spins=n,d=6,edge_type=EdgeType.DISCRETE)
    
    

    for i in range(number_of_train_instances):
        graph=graph_generator.get()
        save_file_name = f'{distribution}_{n}vertices_graph_{str(i).zfill(4)}'
        save_file_path=os.path.join(train_folder,save_file_name)
        sparse_matrix = csr_matrix(graph)
        save_npz(save_file_path, sparse_matrix)

    for i in range(number_of_val_instances):
        graph=graph_generator.get()
        save_file_name = f'{distribution}_{n}vertices_graph_{str(i).zfill(4)}'
        save_file_path=os.path.join(val_folder,save_file_name)
        sparse_matrix = csr_matrix(graph)
        save_npz(save_file_path, sparse_matrix)
        


In [None]:
df