In [None]:
cd ..

In [None]:
import gc
import torch
import random
import numpy as np
from tqdm.auto import tqdm

from rga.data.adj_matrix_data_module import AdjMatrixDataModule
from rga.data.graph_loaders import RealGraphLoader, SyntheticGraphLoader
from rga.experiments.decorators import add_graphloader_args

In [None]:
torch.manual_seed(0)
random.seed(0)
np.random.seed(0)

In [None]:
train_val_test_split = [0.7, 0.15, 0.15]
train_val_test_permutation_split = [1, 0, 0.0]
num_dataset_graph_permutations = 10

In [None]:
N = 5

In [None]:
datasets_path = './datasets'

In [None]:
to_save_path = '/usr/local/datasets'
!mkdir $to_save_path/GRID-MEDIUM
!mkdir $to_save_path/IMDB-BINARY
!mkdir $to_save_path/IMDB-MULTI
!mkdir $to_save_path/COLLAB
!mkdir $to_save_path/REDDIT-BINARY
!mkdir $to_save_path/REDDIT-MULTI-5K
!mkdir $to_save_path/REDDIT-MULTI-12K

In [None]:
@add_graphloader_args
class SyntheticSaver(AdjMatrixDataModule):
    graphloader_class = SyntheticGraphLoader

In [None]:
for i in range(N):
    SyntheticSaver(
        graph_type='grid_medium',
        num_dataset_graph_permutations=100, 
        train_val_test_split=train_val_test_split, 
        train_val_test_permutation_split=train_val_test_permutation_split,
        use_labels=False,
        save_dataset_to_pickle=to_save_path+'/GRID-MEDIUM/'+str(i)+'.pkl',
        bfs=False,
        deduplicate_train = False,
        deduplicate_val_test = False,
        batch_size=1,
        batch_size_val=0,
        batch_size_test=0,
        workers=0
    )

In [None]:
@add_graphloader_args
class RealSaver(AdjMatrixDataModule):
    graphloader_class = RealGraphLoader

In [None]:
def save_datasets(dataset_name):
    for i in tqdm(range(N), desc=dataset_name):
        RealSaver(
            datasets_dir=datasets_path,
            dataset_name=dataset_name,
            use_labels=True,
            max_graph_size=None,
            num_dataset_graph_permutations=num_dataset_graph_permutations, 
            train_val_test_split=train_val_test_split, 
            train_val_test_permutation_split=train_val_test_permutation_split,
            save_dataset_to_pickle=to_save_path+'/'+dataset_name+'/'+str(i)+'.pkl',
            bfs=False,
            deduplicate_train = False,
            deduplicate_val_test = False,
            batch_size=1,
            batch_size_val=0,
            batch_size_test=0,
            workers=0
        )
        gc.collect()

In [None]:
save_datasets('IMDB-BINARY')

In [None]:
save_datasets('IMDB-MULTI')

In [None]:
save_datasets('COLLAB')

In [None]:
num_dataset_graph_permutations = 1

In [None]:
save_datasets('REDDIT-BINARY')

In [None]:
save_datasets('REDDIT-MULTI-5K')

In [None]:
save_datasets('REDDIT-MULTI-12K')