In [1]:
import gc
import torch
import random
import numpy as np
from tqdm.auto import tqdm

from graph_nn_vae.data.adj_matrix_data_module import AdjMatrixDataModule
from graph_nn_vae.data.graph_loaders import RealGraphLoader, SyntheticGraphLoader
from graph_nn_vae.experiments.decorators import add_graphloader_args

In [2]:
torch.manual_seed(0)
random.seed(0)
np.random.seed(0)

In [3]:
train_val_test_split = [0.7, 0.15, 0.15]
train_val_test_permutation_split = [1, 0, 0.0]
num_dataset_graph_permutations = 10

In [4]:
N = 5

In [5]:
datasets_path = './datasets'

In [6]:
to_save_path = '/usr/local/datasets'
!mkdir $to_save_path/GRID-SMALL
!mkdir $to_save_path/IMDB-BINARY
!mkdir $to_save_path/IMDB-MULTI
!mkdir $to_save_path/COLLAB
!mkdir $to_save_path/REDDIT-BINARY
!mkdir $to_save_path/REDDIT-MULTI-5K
!mkdir $to_save_path/REDDIT-MULTI-12K

mkdir: cannot create directory ‘/usr/local/datasets/GRID-SMALL’: File exists
mkdir: cannot create directory ‘/usr/local/datasets/IMDB-BINARY’: File exists
mkdir: cannot create directory ‘/usr/local/datasets/IMDB-MULTI’: File exists
mkdir: cannot create directory ‘/usr/local/datasets/COLLAB’: File exists
mkdir: cannot create directory ‘/usr/local/datasets/REDDIT-BINARY’: File exists
mkdir: cannot create directory ‘/usr/local/datasets/REDDIT-MULTI-5K’: File exists
mkdir: cannot create directory ‘/usr/local/datasets/REDDIT-MULTI-12K’: File exists


In [7]:
@add_graphloader_args
class SyntheticSaver(AdjMatrixDataModule):
    graphloader_class = SyntheticGraphLoader

In [8]:
for i in range(N):
    SyntheticSaver(
        graph_type='grid_small',
        num_dataset_graph_permutations=num_dataset_graph_permutations, 
        train_val_test_split=[0.5, 0.25, 0.25], 
        train_val_test_permutation_split=train_val_test_permutation_split,
        use_labels=False,
        save_dataset_to_pickle=to_save_path+'/GRID-SMALL/'+str(i)+'.pkl',
        bfs=False,
        deduplicate_train = False,
        deduplicate_val_test = False,
        batch_size=1,
        batch_size_val=0,
        batch_size_test=0,
        workers=0
    )

Statistic of set:  Full original dataset
             Dataset size : 9
                   Labels : False
           Min node count : 4
       Average node count : 9.0
           Max node count : 16
           Min edge count : 4.0
       Average edge count : 12.0
           Max edge count : 24.0
     Min filling fraction : 0.2
 Average filling fraction : 0.37
     Max filling fraction : 0.67
----------------------------------------------------------------
Dataset pickled successfully
File path: /usr/local/datasets/GRID-SMALL/0.pkl
File size: 35.33 KB
Statistic of set:  Train dataset
             Dataset size : 43
                   Labels : False
           Min node count : 4
       Average node count : 10.05
           Max node count : 16
           Min edge count : 4.0
       Average edge count : 13.77
           Max edge count : 24.0
     Min filling fraction : 0.2
 Average filling fraction : 0.34
     Max filling fraction : 0.67
------------------------------------------------------

preparing dataset train for autoencoder:   0%|          | 0/43 [00:00<?, ?it/s]

preparing dataset val 0 for autoencoder:   0%|          | 0/20 [00:00<?, ?it/s]

preparing dataset test 0 for autoencoder:   0%|          | 0/20 [00:00<?, ?it/s]

Statistic of set:  Full original dataset
             Dataset size : 9
                   Labels : False
           Min node count : 4
       Average node count : 9.0
           Max node count : 16
           Min edge count : 4.0
       Average edge count : 12.0
           Max edge count : 24.0
     Min filling fraction : 0.2
 Average filling fraction : 0.37
     Max filling fraction : 0.67
----------------------------------------------------------------
Dataset pickled successfully
File path: /usr/local/datasets/GRID-SMALL/1.pkl
File size: 35.16 KB
Statistic of set:  Train dataset
             Dataset size : 42
                   Labels : False
           Min node count : 4
       Average node count : 7.05
           Max node count : 9
           Min edge count : 4.0
       Average edge count : 8.69
           Max edge count : 12.0
     Min filling fraction : 0.33
 Average filling fraction : 0.42
     Max filling fraction : 0.67
--------------------------------------------------------

preparing dataset train for autoencoder:   0%|          | 0/42 [00:00<?, ?it/s]

preparing dataset val 0 for autoencoder:   0%|          | 0/20 [00:00<?, ?it/s]

preparing dataset test 0 for autoencoder:   0%|          | 0/20 [00:00<?, ?it/s]

Statistic of set:  Full original dataset
             Dataset size : 9
                   Labels : False
           Min node count : 4
       Average node count : 9.0
           Max node count : 16
           Min edge count : 4.0
       Average edge count : 12.0
           Max edge count : 24.0
     Min filling fraction : 0.2
 Average filling fraction : 0.37
     Max filling fraction : 0.67
----------------------------------------------------------------
Dataset pickled successfully
File path: /usr/local/datasets/GRID-SMALL/2.pkl
File size: 35.33 KB
Statistic of set:  Train dataset
             Dataset size : 43
                   Labels : False
           Min node count : 4
       Average node count : 9.81
           Max node count : 12
           Min edge count : 4.0
       Average edge count : 13.3
           Max edge count : 17.0
     Min filling fraction : 0.26
 Average filling fraction : 0.33
     Max filling fraction : 0.67
-------------------------------------------------------

preparing dataset train for autoencoder:   0%|          | 0/43 [00:00<?, ?it/s]

preparing dataset val 0 for autoencoder:   0%|          | 0/20 [00:00<?, ?it/s]

preparing dataset test 0 for autoencoder:   0%|          | 0/20 [00:00<?, ?it/s]

Statistic of set:  Full original dataset
             Dataset size : 9
                   Labels : False
           Min node count : 4
       Average node count : 9.0
           Max node count : 16
           Min edge count : 4.0
       Average edge count : 12.0
           Max edge count : 24.0
     Min filling fraction : 0.2
 Average filling fraction : 0.37
     Max filling fraction : 0.67
----------------------------------------------------------------
Dataset pickled successfully
File path: /usr/local/datasets/GRID-SMALL/3.pkl
File size: 35.33 KB
Statistic of set:  Train dataset
             Dataset size : 43
                   Labels : False
           Min node count : 4
       Average node count : 6.79
           Max node count : 8
           Min edge count : 4.0
       Average edge count : 8.19
           Max edge count : 10.0
     Min filling fraction : 0.36
 Average filling fraction : 0.43
     Max filling fraction : 0.67
--------------------------------------------------------

preparing dataset train for autoencoder:   0%|          | 0/43 [00:00<?, ?it/s]

preparing dataset val 0 for autoencoder:   0%|          | 0/20 [00:00<?, ?it/s]

preparing dataset test 0 for autoencoder:   0%|          | 0/20 [00:00<?, ?it/s]

Statistic of set:  Full original dataset
             Dataset size : 9
                   Labels : False
           Min node count : 4
       Average node count : 9.0
           Max node count : 16
           Min edge count : 4.0
       Average edge count : 12.0
           Max edge count : 24.0
     Min filling fraction : 0.2
 Average filling fraction : 0.37
     Max filling fraction : 0.67
----------------------------------------------------------------
Dataset pickled successfully
File path: /usr/local/datasets/GRID-SMALL/4.pkl
File size: 35.16 KB
Statistic of set:  Train dataset
             Dataset size : 42
                   Labels : False
           Min node count : 4
       Average node count : 9.43
           Max node count : 12
           Min edge count : 4.0
       Average edge count : 12.74
           Max edge count : 17.0
     Min filling fraction : 0.26
 Average filling fraction : 0.35
     Max filling fraction : 0.67
------------------------------------------------------

preparing dataset train for autoencoder:   0%|          | 0/42 [00:00<?, ?it/s]

preparing dataset val 0 for autoencoder:   0%|          | 0/20 [00:00<?, ?it/s]

preparing dataset test 0 for autoencoder:   0%|          | 0/20 [00:00<?, ?it/s]

In [9]:
@add_graphloader_args
class RealSaver(AdjMatrixDataModule):
    graphloader_class = RealGraphLoader

In [10]:
def save_datasets(dataset_name):
    for i in tqdm(range(N), desc=dataset_name):
        RealSaver(
            datasets_dir=datasets_path,
            dataset_name=dataset_name,
            use_labels=True,
            max_graph_size=None,
            num_dataset_graph_permutations=num_dataset_graph_permutations, 
            train_val_test_split=train_val_test_split, 
            train_val_test_permutation_split=train_val_test_permutation_split,
            save_dataset_to_pickle=to_save_path+'/'+dataset_name+'/'+str(i)+'.pkl',
            bfs=False,
            deduplicate_train = False,
            deduplicate_val_test = False,
            batch_size=1,
            batch_size_val=0,
            batch_size_test=0,
            workers=0
        )
        gc.collect()

In [11]:
save_datasets('IMDB-BINARY')

IMDB-BINARY:   0%|          | 0/5 [00:00<?, ?it/s]

reading edges: 0it [00:00, ?it/s]

Statistic of set:  Full original dataset
             Dataset size : 1000
                   Labels : True
           Min node count : 12
       Average node count : 19.77
           Max node count : 136
           Min edge count : 26.0
       Average edge count : 96.53
           Max edge count : 1249.0
     Min filling fraction : 0.1
 Average filling fraction : 0.52
     Max filling fraction : 1.0
          Label "0" count : 500
          Label "1" count : 500
----------------------------------------------------------------
Dataset pickled successfully
File path: /usr/local/datasets/IMDB-BINARY/0.pkl
File size: 34.54 MB
Statistic of set:  Train dataset
             Dataset size : 6171
                   Labels : True
           Min node count : 12
       Average node count : 19.99
           Max node count : 136
           Min edge count : 26.0
       Average edge count : 88.1
           Max edge count : 1249.0
     Min filling fraction : 0.1
 Average filling fraction : 0.46
     Max

preparing dataset train for autoencoder:   0%|          | 0/6171 [00:00<?, ?it/s]

preparing dataset val 0 for autoencoder:   0%|          | 0/1257 [00:00<?, ?it/s]

preparing dataset test 0 for autoencoder:   0%|          | 0/1320 [00:00<?, ?it/s]

reading edges: 0it [00:00, ?it/s]

Statistic of set:  Full original dataset
             Dataset size : 1000
                   Labels : True
           Min node count : 12
       Average node count : 19.77
           Max node count : 136
           Min edge count : 26.0
       Average edge count : 96.53
           Max edge count : 1249.0
     Min filling fraction : 0.1
 Average filling fraction : 0.52
     Max filling fraction : 1.0
          Label "0" count : 500
          Label "1" count : 500
----------------------------------------------------------------
Dataset pickled successfully
File path: /usr/local/datasets/IMDB-BINARY/1.pkl
File size: 34.54 MB
Statistic of set:  Train dataset
             Dataset size : 6091
                   Labels : True
           Min node count : 12
       Average node count : 20.14
           Max node count : 136
           Min edge count : 26.0
       Average edge count : 86.37
           Max edge count : 1249.0
     Min filling fraction : 0.1
 Average filling fraction : 0.45
     Ma

preparing dataset train for autoencoder:   0%|          | 0/6091 [00:00<?, ?it/s]

preparing dataset val 0 for autoencoder:   0%|          | 0/1319 [00:00<?, ?it/s]

preparing dataset test 0 for autoencoder:   0%|          | 0/1336 [00:00<?, ?it/s]

reading edges: 0it [00:00, ?it/s]

Statistic of set:  Full original dataset
             Dataset size : 1000
                   Labels : True
           Min node count : 12
       Average node count : 19.77
           Max node count : 136
           Min edge count : 26.0
       Average edge count : 96.53
           Max edge count : 1249.0
     Min filling fraction : 0.1
 Average filling fraction : 0.52
     Max filling fraction : 1.0
          Label "0" count : 500
          Label "1" count : 500
----------------------------------------------------------------
Dataset pickled successfully
File path: /usr/local/datasets/IMDB-BINARY/2.pkl
File size: 34.54 MB
Statistic of set:  Train dataset
             Dataset size : 6154
                   Labels : True
           Min node count : 12
       Average node count : 20.21
           Max node count : 136
           Min edge count : 26.0
       Average edge count : 86.9
           Max edge count : 1249.0
     Min filling fraction : 0.1
 Average filling fraction : 0.45
     Max

preparing dataset train for autoencoder:   0%|          | 0/6154 [00:00<?, ?it/s]

preparing dataset val 0 for autoencoder:   0%|          | 0/1256 [00:00<?, ?it/s]

preparing dataset test 0 for autoencoder:   0%|          | 0/1337 [00:00<?, ?it/s]

reading edges: 0it [00:00, ?it/s]

Statistic of set:  Full original dataset
             Dataset size : 1000
                   Labels : True
           Min node count : 12
       Average node count : 19.77
           Max node count : 136
           Min edge count : 26.0
       Average edge count : 96.53
           Max edge count : 1249.0
     Min filling fraction : 0.1
 Average filling fraction : 0.52
     Max filling fraction : 1.0
          Label "0" count : 500
          Label "1" count : 500
----------------------------------------------------------------
Dataset pickled successfully
File path: /usr/local/datasets/IMDB-BINARY/3.pkl
File size: 34.54 MB
Statistic of set:  Train dataset
             Dataset size : 6061
                   Labels : True
           Min node count : 12
       Average node count : 20.01
           Max node count : 136
           Min edge count : 26.0
       Average edge count : 86.05
           Max edge count : 1249.0
     Min filling fraction : 0.1
 Average filling fraction : 0.45
     Ma

preparing dataset train for autoencoder:   0%|          | 0/6061 [00:00<?, ?it/s]

preparing dataset val 0 for autoencoder:   0%|          | 0/1329 [00:00<?, ?it/s]

preparing dataset test 0 for autoencoder:   0%|          | 0/1356 [00:00<?, ?it/s]

reading edges: 0it [00:00, ?it/s]

Statistic of set:  Full original dataset
             Dataset size : 1000
                   Labels : True
           Min node count : 12
       Average node count : 19.77
           Max node count : 136
           Min edge count : 26.0
       Average edge count : 96.53
           Max edge count : 1249.0
     Min filling fraction : 0.1
 Average filling fraction : 0.52
     Max filling fraction : 1.0
          Label "0" count : 500
          Label "1" count : 500
----------------------------------------------------------------
Dataset pickled successfully
File path: /usr/local/datasets/IMDB-BINARY/4.pkl
File size: 34.54 MB
Statistic of set:  Train dataset
             Dataset size : 6136
                   Labels : True
           Min node count : 12
       Average node count : 20.11
           Max node count : 136
           Min edge count : 26.0
       Average edge count : 86.77
           Max edge count : 1249.0
     Min filling fraction : 0.1
 Average filling fraction : 0.45
     Ma

preparing dataset train for autoencoder:   0%|          | 0/6136 [00:00<?, ?it/s]

preparing dataset val 0 for autoencoder:   0%|          | 0/1275 [00:00<?, ?it/s]

preparing dataset test 0 for autoencoder:   0%|          | 0/1338 [00:00<?, ?it/s]

In [12]:
save_datasets('IMDB-MULTI')

IMDB-MULTI:   0%|          | 0/5 [00:00<?, ?it/s]

reading edges: 0it [00:00, ?it/s]

Statistic of set:  Full original dataset
             Dataset size : 1500
                   Labels : True
           Min node count : 7
       Average node count : 13.0
           Max node count : 89
           Min edge count : 12.0
       Average edge count : 65.94
           Max edge count : 1467.0
     Min filling fraction : 0.13
 Average filling fraction : 0.77
     Max filling fraction : 1.0
          Label "1" count : 500
          Label "2" count : 500
          Label "3" count : 500
----------------------------------------------------------------
Dataset pickled successfully
File path: /usr/local/datasets/IMDB-MULTI/0.pkl
File size: 21.42 MB
Statistic of set:  Train dataset
             Dataset size : 5450
                   Labels : True
           Min node count : 7
       Average node count : 15.8
           Max node count : 78
           Min edge count : 12.0
       Average edge count : 72.93
           Max edge count : 982.0
     Min filling fraction : 0.13
 Average filli

preparing dataset train for autoencoder:   0%|          | 0/5450 [00:00<?, ?it/s]

preparing dataset val 0 for autoencoder:   0%|          | 0/1235 [00:00<?, ?it/s]

preparing dataset test 0 for autoencoder:   0%|          | 0/1187 [00:00<?, ?it/s]

reading edges: 0it [00:00, ?it/s]

Statistic of set:  Full original dataset
             Dataset size : 1500
                   Labels : True
           Min node count : 7
       Average node count : 13.0
           Max node count : 89
           Min edge count : 12.0
       Average edge count : 65.94
           Max edge count : 1467.0
     Min filling fraction : 0.13
 Average filling fraction : 0.77
     Max filling fraction : 1.0
          Label "1" count : 500
          Label "2" count : 500
          Label "3" count : 500
----------------------------------------------------------------
Dataset pickled successfully
File path: /usr/local/datasets/IMDB-MULTI/1.pkl
File size: 21.42 MB
Statistic of set:  Train dataset
             Dataset size : 5459
                   Labels : True
           Min node count : 7
       Average node count : 15.72
           Max node count : 89
           Min edge count : 12.0
       Average edge count : 72.3
           Max edge count : 1467.0
     Min filling fraction : 0.13
 Average fill

preparing dataset train for autoencoder:   0%|          | 0/5459 [00:00<?, ?it/s]

preparing dataset val 0 for autoencoder:   0%|          | 0/1265 [00:00<?, ?it/s]

preparing dataset test 0 for autoencoder:   0%|          | 0/1142 [00:00<?, ?it/s]

reading edges: 0it [00:00, ?it/s]

Statistic of set:  Full original dataset
             Dataset size : 1500
                   Labels : True
           Min node count : 7
       Average node count : 13.0
           Max node count : 89
           Min edge count : 12.0
       Average edge count : 65.94
           Max edge count : 1467.0
     Min filling fraction : 0.13
 Average filling fraction : 0.77
     Max filling fraction : 1.0
          Label "1" count : 500
          Label "2" count : 500
          Label "3" count : 500
----------------------------------------------------------------
Dataset pickled successfully
File path: /usr/local/datasets/IMDB-MULTI/2.pkl
File size: 21.42 MB
Statistic of set:  Train dataset
             Dataset size : 5494
                   Labels : True
           Min node count : 7
       Average node count : 16.1
           Max node count : 78
           Min edge count : 12.0
       Average edge count : 74.04
           Max edge count : 982.0
     Min filling fraction : 0.13
 Average filli

preparing dataset train for autoencoder:   0%|          | 0/5494 [00:00<?, ?it/s]

preparing dataset val 0 for autoencoder:   0%|          | 0/1219 [00:00<?, ?it/s]

preparing dataset test 0 for autoencoder:   0%|          | 0/1154 [00:00<?, ?it/s]

reading edges: 0it [00:00, ?it/s]

Statistic of set:  Full original dataset
             Dataset size : 1500
                   Labels : True
           Min node count : 7
       Average node count : 13.0
           Max node count : 89
           Min edge count : 12.0
       Average edge count : 65.94
           Max edge count : 1467.0
     Min filling fraction : 0.13
 Average filling fraction : 0.77
     Max filling fraction : 1.0
          Label "1" count : 500
          Label "2" count : 500
          Label "3" count : 500
----------------------------------------------------------------
Dataset pickled successfully
File path: /usr/local/datasets/IMDB-MULTI/3.pkl
File size: 21.43 MB
Statistic of set:  Train dataset
             Dataset size : 5597
                   Labels : True
           Min node count : 7
       Average node count : 16.13
           Max node count : 78
           Min edge count : 12.0
       Average edge count : 75.41
           Max edge count : 982.0
     Min filling fraction : 0.13
 Average fill

preparing dataset train for autoencoder:   0%|          | 0/5597 [00:00<?, ?it/s]

preparing dataset val 0 for autoencoder:   0%|          | 0/1068 [00:00<?, ?it/s]

preparing dataset test 0 for autoencoder:   0%|          | 0/1209 [00:00<?, ?it/s]

reading edges: 0it [00:00, ?it/s]

Statistic of set:  Full original dataset
             Dataset size : 1500
                   Labels : True
           Min node count : 7
       Average node count : 13.0
           Max node count : 89
           Min edge count : 12.0
       Average edge count : 65.94
           Max edge count : 1467.0
     Min filling fraction : 0.13
 Average filling fraction : 0.77
     Max filling fraction : 1.0
          Label "1" count : 500
          Label "2" count : 500
          Label "3" count : 500
----------------------------------------------------------------
Dataset pickled successfully
File path: /usr/local/datasets/IMDB-MULTI/4.pkl
File size: 21.42 MB
Statistic of set:  Train dataset
             Dataset size : 5515
                   Labels : True
           Min node count : 7
       Average node count : 16.07
           Max node count : 89
           Min edge count : 12.0
       Average edge count : 79.82
           Max edge count : 1467.0
     Min filling fraction : 0.13
 Average fil

preparing dataset train for autoencoder:   0%|          | 0/5515 [00:00<?, ?it/s]

preparing dataset val 0 for autoencoder:   0%|          | 0/1103 [00:00<?, ?it/s]

preparing dataset test 0 for autoencoder:   0%|          | 0/1245 [00:00<?, ?it/s]

In [13]:
save_datasets('COLLAB')

COLLAB:   0%|          | 0/5 [00:00<?, ?it/s]

reading edges: 0it [00:00, ?it/s]

Statistic of set:  Full original dataset
             Dataset size : 5000
                   Labels : True
           Min node count : 32
       Average node count : 74.49
           Max node count : 492
           Min edge count : 60.0
       Average edge count : 2457.5
           Max edge count : 40119.5
     Min filling fraction : 0.03
 Average filling fraction : 0.51
     Max filling fraction : 1.0
          Label "1" count : 2600
          Label "2" count : 775
          Label "3" count : 1625
----------------------------------------------------------------
Dataset pickled successfully
File path: /usr/local/datasets/COLLAB/0.pkl
File size: 3.4 GB
Statistic of set:  Train dataset
             Dataset size : 31785
                   Labels : True
           Min node count : 32
       Average node count : 76.55
           Max node count : 487
           Min edge count : 61.0
       Average edge count : 2525.05
           Max edge count : 40119.5
     Min filling fraction : 0.03
 Aver

preparing dataset train for autoencoder:   0%|          | 0/31785 [00:00<?, ?it/s]

preparing dataset val 0 for autoencoder:   0%|          | 0/6828 [00:00<?, ?it/s]

preparing dataset test 0 for autoencoder:   0%|          | 0/6632 [00:00<?, ?it/s]

reading edges: 0it [00:00, ?it/s]

Statistic of set:  Full original dataset
             Dataset size : 5000
                   Labels : True
           Min node count : 32
       Average node count : 74.49
           Max node count : 492
           Min edge count : 60.0
       Average edge count : 2457.5
           Max edge count : 40119.5
     Min filling fraction : 0.03
 Average filling fraction : 0.51
     Max filling fraction : 1.0
          Label "1" count : 2600
          Label "2" count : 775
          Label "3" count : 1625
----------------------------------------------------------------
Dataset pickled successfully
File path: /usr/local/datasets/COLLAB/1.pkl
File size: 3.4 GB
Statistic of set:  Train dataset
             Dataset size : 31734
                   Labels : True
           Min node count : 32
       Average node count : 77.14
           Max node count : 492
           Min edge count : 60.0
       Average edge count : 2569.83
           Max edge count : 40119.5
     Min filling fraction : 0.05
 Aver

preparing dataset train for autoencoder:   0%|          | 0/31734 [00:00<?, ?it/s]

preparing dataset val 0 for autoencoder:   0%|          | 0/6771 [00:00<?, ?it/s]

preparing dataset test 0 for autoencoder:   0%|          | 0/6739 [00:00<?, ?it/s]

reading edges: 0it [00:00, ?it/s]

Statistic of set:  Full original dataset
             Dataset size : 5000
                   Labels : True
           Min node count : 32
       Average node count : 74.49
           Max node count : 492
           Min edge count : 60.0
       Average edge count : 2457.5
           Max edge count : 40119.5
     Min filling fraction : 0.03
 Average filling fraction : 0.51
     Max filling fraction : 1.0
          Label "1" count : 2600
          Label "2" count : 775
          Label "3" count : 1625
----------------------------------------------------------------
Dataset pickled successfully
File path: /usr/local/datasets/COLLAB/2.pkl
File size: 3.4 GB
Statistic of set:  Train dataset
             Dataset size : 31648
                   Labels : True
           Min node count : 32
       Average node count : 77.05
           Max node count : 487
           Min edge count : 60.0
       Average edge count : 2549.15
           Max edge count : 40119.5
     Min filling fraction : 0.03
 Aver

preparing dataset train for autoencoder:   0%|          | 0/31648 [00:00<?, ?it/s]

preparing dataset val 0 for autoencoder:   0%|          | 0/6780 [00:00<?, ?it/s]

preparing dataset test 0 for autoencoder:   0%|          | 0/6796 [00:00<?, ?it/s]

reading edges: 0it [00:00, ?it/s]

Statistic of set:  Full original dataset
             Dataset size : 5000
                   Labels : True
           Min node count : 32
       Average node count : 74.49
           Max node count : 492
           Min edge count : 60.0
       Average edge count : 2457.5
           Max edge count : 40119.5
     Min filling fraction : 0.03
 Average filling fraction : 0.51
     Max filling fraction : 1.0
          Label "1" count : 2600
          Label "2" count : 775
          Label "3" count : 1625
----------------------------------------------------------------
Dataset pickled successfully
File path: /usr/local/datasets/COLLAB/3.pkl
File size: 3.4 GB
Statistic of set:  Train dataset
             Dataset size : 31659
                   Labels : True
           Min node count : 32
       Average node count : 77.01
           Max node count : 483
           Min edge count : 60.0
       Average edge count : 2632.52
           Max edge count : 40008.0
     Min filling fraction : 0.03
 Aver

preparing dataset train for autoencoder:   0%|          | 0/31659 [00:00<?, ?it/s]

preparing dataset val 0 for autoencoder:   0%|          | 0/6792 [00:00<?, ?it/s]

preparing dataset test 0 for autoencoder:   0%|          | 0/6777 [00:00<?, ?it/s]

reading edges: 0it [00:00, ?it/s]

Statistic of set:  Full original dataset
             Dataset size : 5000
                   Labels : True
           Min node count : 32
       Average node count : 74.49
           Max node count : 492
           Min edge count : 60.0
       Average edge count : 2457.5
           Max edge count : 40119.5
     Min filling fraction : 0.03
 Average filling fraction : 0.51
     Max filling fraction : 1.0
          Label "1" count : 2600
          Label "2" count : 775
          Label "3" count : 1625
----------------------------------------------------------------
Dataset pickled successfully
File path: /usr/local/datasets/COLLAB/4.pkl
File size: 3.4 GB
Statistic of set:  Train dataset
             Dataset size : 31729
                   Labels : True
           Min node count : 32
       Average node count : 77.21
           Max node count : 492
           Min edge count : 60.0
       Average edge count : 2598.58
           Max edge count : 40119.5
     Min filling fraction : 0.05
 Aver

preparing dataset train for autoencoder:   0%|          | 0/31729 [00:00<?, ?it/s]

preparing dataset val 0 for autoencoder:   0%|          | 0/6600 [00:00<?, ?it/s]

preparing dataset test 0 for autoencoder:   0%|          | 0/6908 [00:00<?, ?it/s]

In [14]:
train_val_test_permutation_split = [1, 0, 0.0]
num_dataset_graph_permutations = 1

In [15]:
save_datasets('REDDIT-BINARY')

REDDIT-BINARY:   0%|          | 0/5 [00:00<?, ?it/s]

reading edges: 0it [00:00, ?it/s]

Statistic of set:  Full original dataset
             Dataset size : 2000
                   Labels : True
           Min node count : 6
       Average node count : 429.63
           Max node count : 3782
           Min edge count : 4.0
       Average edge count : 497.75
           Max edge count : 4071.0
     Min filling fraction : 0.0
 Average filling fraction : 0.02
     Max filling fraction : 0.29
         Label "-1" count : 1000
          Label "1" count : 1000
----------------------------------------------------------------
Dataset pickled successfully
File path: /usr/local/datasets/REDDIT-BINARY/0.pkl
File size: 7.32 GB
Statistic of set:  Train dataset
             Dataset size : 1400
                   Labels : True
           Min node count : 6
       Average node count : 429.6
           Max node count : 3782
           Min edge count : 4.0
       Average edge count : 497.08
           Max edge count : 4071.0
     Min filling fraction : 0.0
 Average filling fraction : 0.02
  

preparing dataset train for autoencoder:   0%|          | 0/1400 [00:00<?, ?it/s]

preparing dataset val 0 for autoencoder:   0%|          | 0/300 [00:00<?, ?it/s]

preparing dataset test 0 for autoencoder:   0%|          | 0/300 [00:00<?, ?it/s]

reading edges: 0it [00:00, ?it/s]

Statistic of set:  Full original dataset
             Dataset size : 2000
                   Labels : True
           Min node count : 6
       Average node count : 429.63
           Max node count : 3782
           Min edge count : 4.0
       Average edge count : 497.75
           Max edge count : 4071.0
     Min filling fraction : 0.0
 Average filling fraction : 0.02
     Max filling fraction : 0.29
         Label "-1" count : 1000
          Label "1" count : 1000
----------------------------------------------------------------
Dataset pickled successfully
File path: /usr/local/datasets/REDDIT-BINARY/1.pkl
File size: 7.32 GB
Statistic of set:  Train dataset
             Dataset size : 1400
                   Labels : True
           Min node count : 6
       Average node count : 410.23
           Max node count : 3782
           Min edge count : 4.0
       Average edge count : 476.39
           Max edge count : 4071.0
     Min filling fraction : 0.0
 Average filling fraction : 0.02
 

preparing dataset train for autoencoder:   0%|          | 0/1400 [00:00<?, ?it/s]

preparing dataset val 0 for autoencoder:   0%|          | 0/300 [00:00<?, ?it/s]

preparing dataset test 0 for autoencoder:   0%|          | 0/300 [00:00<?, ?it/s]

reading edges: 0it [00:00, ?it/s]

Statistic of set:  Full original dataset
             Dataset size : 2000
                   Labels : True
           Min node count : 6
       Average node count : 429.63
           Max node count : 3782
           Min edge count : 4.0
       Average edge count : 497.75
           Max edge count : 4071.0
     Min filling fraction : 0.0
 Average filling fraction : 0.02
     Max filling fraction : 0.29
         Label "-1" count : 1000
          Label "1" count : 1000
----------------------------------------------------------------
Dataset pickled successfully
File path: /usr/local/datasets/REDDIT-BINARY/2.pkl
File size: 7.32 GB
Statistic of set:  Train dataset
             Dataset size : 1400
                   Labels : True
           Min node count : 9
       Average node count : 441.78
           Max node count : 3782
           Min edge count : 8.0
       Average edge count : 512.24
           Max edge count : 4071.0
     Min filling fraction : 0.0
 Average filling fraction : 0.02
 

preparing dataset train for autoencoder:   0%|          | 0/1400 [00:00<?, ?it/s]

preparing dataset val 0 for autoencoder:   0%|          | 0/300 [00:00<?, ?it/s]

preparing dataset test 0 for autoencoder:   0%|          | 0/300 [00:00<?, ?it/s]

reading edges: 0it [00:00, ?it/s]

Statistic of set:  Full original dataset
             Dataset size : 2000
                   Labels : True
           Min node count : 6
       Average node count : 429.63
           Max node count : 3782
           Min edge count : 4.0
       Average edge count : 497.75
           Max edge count : 4071.0
     Min filling fraction : 0.0
 Average filling fraction : 0.02
     Max filling fraction : 0.29
         Label "-1" count : 1000
          Label "1" count : 1000
----------------------------------------------------------------
Dataset pickled successfully
File path: /usr/local/datasets/REDDIT-BINARY/3.pkl
File size: 7.32 GB
Statistic of set:  Train dataset
             Dataset size : 1400
                   Labels : True
           Min node count : 7
       Average node count : 432.7
           Max node count : 3782
           Min edge count : 6.0
       Average edge count : 500.22
           Max edge count : 4071.0
     Min filling fraction : 0.0
 Average filling fraction : 0.02
  

preparing dataset train for autoencoder:   0%|          | 0/1400 [00:00<?, ?it/s]

preparing dataset val 0 for autoencoder:   0%|          | 0/300 [00:00<?, ?it/s]

preparing dataset test 0 for autoencoder:   0%|          | 0/300 [00:00<?, ?it/s]

reading edges: 0it [00:00, ?it/s]

Statistic of set:  Full original dataset
             Dataset size : 2000
                   Labels : True
           Min node count : 6
       Average node count : 429.63
           Max node count : 3782
           Min edge count : 4.0
       Average edge count : 497.75
           Max edge count : 4071.0
     Min filling fraction : 0.0
 Average filling fraction : 0.02
     Max filling fraction : 0.29
         Label "-1" count : 1000
          Label "1" count : 1000
----------------------------------------------------------------
Dataset pickled successfully
File path: /usr/local/datasets/REDDIT-BINARY/4.pkl
File size: 7.32 GB
Statistic of set:  Train dataset
             Dataset size : 1400
                   Labels : True
           Min node count : 6
       Average node count : 437.41
           Max node count : 3648
           Min edge count : 4.0
       Average edge count : 506.97
           Max edge count : 4006.0
     Min filling fraction : 0.0
 Average filling fraction : 0.02
 

preparing dataset train for autoencoder:   0%|          | 0/1400 [00:00<?, ?it/s]

preparing dataset val 0 for autoencoder:   0%|          | 0/300 [00:00<?, ?it/s]

preparing dataset test 0 for autoencoder:   0%|          | 0/300 [00:00<?, ?it/s]

In [16]:
save_datasets('REDDIT-MULTI-5K')

REDDIT-MULTI-5K:   0%|          | 0/5 [00:00<?, ?it/s]

reading edges: 0it [00:00, ?it/s]

Statistic of set:  Full original dataset
             Dataset size : 4999
                   Labels : True
           Min node count : 22
       Average node count : 508.52
           Max node count : 3648
           Min edge count : 21.0
       Average edge count : 594.87
           Max edge count : 4783.0
     Min filling fraction : 0.0
 Average filling fraction : 0.01
     Max filling fraction : 0.09
          Label "1" count : 1000
          Label "2" count : 1000
          Label "3" count : 1000
          Label "4" count : 1000
          Label "5" count : 999
----------------------------------------------------------------
Dataset pickled successfully
File path: /usr/local/datasets/REDDIT-MULTI-5K/0.pkl
File size: 17.26 GB
Statistic of set:  Train dataset
             Dataset size : 3501
                   Labels : True
           Min node count : 22
       Average node count : 510.55
           Max node count : 3648
           Min edge count : 21.0
       Average edge count : 598

preparing dataset train for autoencoder:   0%|          | 0/3501 [00:00<?, ?it/s]

preparing dataset val 0 for autoencoder:   0%|          | 0/749 [00:00<?, ?it/s]

preparing dataset test 0 for autoencoder:   0%|          | 0/749 [00:00<?, ?it/s]

reading edges: 0it [00:00, ?it/s]

Statistic of set:  Full original dataset
             Dataset size : 4999
                   Labels : True
           Min node count : 22
       Average node count : 508.52
           Max node count : 3648
           Min edge count : 21.0
       Average edge count : 594.87
           Max edge count : 4783.0
     Min filling fraction : 0.0
 Average filling fraction : 0.01
     Max filling fraction : 0.09
          Label "1" count : 1000
          Label "2" count : 1000
          Label "3" count : 1000
          Label "4" count : 1000
          Label "5" count : 999
----------------------------------------------------------------
Dataset pickled successfully
File path: /usr/local/datasets/REDDIT-MULTI-5K/1.pkl
File size: 17.26 GB
Statistic of set:  Train dataset
             Dataset size : 3501
                   Labels : True
           Min node count : 22
       Average node count : 514.5
           Max node count : 3648
           Min edge count : 21.0
       Average edge count : 602.

preparing dataset train for autoencoder:   0%|          | 0/3501 [00:00<?, ?it/s]

preparing dataset val 0 for autoencoder:   0%|          | 0/749 [00:00<?, ?it/s]

preparing dataset test 0 for autoencoder:   0%|          | 0/749 [00:00<?, ?it/s]

reading edges: 0it [00:00, ?it/s]

Statistic of set:  Full original dataset
             Dataset size : 4999
                   Labels : True
           Min node count : 22
       Average node count : 508.52
           Max node count : 3648
           Min edge count : 21.0
       Average edge count : 594.87
           Max edge count : 4783.0
     Min filling fraction : 0.0
 Average filling fraction : 0.01
     Max filling fraction : 0.09
          Label "1" count : 1000
          Label "2" count : 1000
          Label "3" count : 1000
          Label "4" count : 1000
          Label "5" count : 999
----------------------------------------------------------------
Dataset pickled successfully
File path: /usr/local/datasets/REDDIT-MULTI-5K/2.pkl
File size: 17.26 GB
Statistic of set:  Train dataset
             Dataset size : 3501
                   Labels : True
           Min node count : 33
       Average node count : 513.99
           Max node count : 3648
           Min edge count : 31.0
       Average edge count : 600

preparing dataset train for autoencoder:   0%|          | 0/3501 [00:00<?, ?it/s]

preparing dataset val 0 for autoencoder:   0%|          | 0/749 [00:00<?, ?it/s]

preparing dataset test 0 for autoencoder:   0%|          | 0/749 [00:00<?, ?it/s]

reading edges: 0it [00:00, ?it/s]

Statistic of set:  Full original dataset
             Dataset size : 4999
                   Labels : True
           Min node count : 22
       Average node count : 508.52
           Max node count : 3648
           Min edge count : 21.0
       Average edge count : 594.87
           Max edge count : 4783.0
     Min filling fraction : 0.0
 Average filling fraction : 0.01
     Max filling fraction : 0.09
          Label "1" count : 1000
          Label "2" count : 1000
          Label "3" count : 1000
          Label "4" count : 1000
          Label "5" count : 999
----------------------------------------------------------------
Dataset pickled successfully
File path: /usr/local/datasets/REDDIT-MULTI-5K/3.pkl
File size: 17.26 GB
Statistic of set:  Train dataset
             Dataset size : 3501
                   Labels : True
           Min node count : 22
       Average node count : 508.18
           Max node count : 3648
           Min edge count : 21.0
       Average edge count : 593

preparing dataset train for autoencoder:   0%|          | 0/3501 [00:00<?, ?it/s]

preparing dataset val 0 for autoencoder:   0%|          | 0/749 [00:00<?, ?it/s]

preparing dataset test 0 for autoencoder:   0%|          | 0/749 [00:00<?, ?it/s]

reading edges: 0it [00:00, ?it/s]

Statistic of set:  Full original dataset
             Dataset size : 4999
                   Labels : True
           Min node count : 22
       Average node count : 508.52
           Max node count : 3648
           Min edge count : 21.0
       Average edge count : 594.87
           Max edge count : 4783.0
     Min filling fraction : 0.0
 Average filling fraction : 0.01
     Max filling fraction : 0.09
          Label "1" count : 1000
          Label "2" count : 1000
          Label "3" count : 1000
          Label "4" count : 1000
          Label "5" count : 999
----------------------------------------------------------------
Dataset pickled successfully
File path: /usr/local/datasets/REDDIT-MULTI-5K/4.pkl
File size: 17.26 GB
Statistic of set:  Train dataset
             Dataset size : 3501
                   Labels : True
           Min node count : 22
       Average node count : 508.65
           Max node count : 3648
           Min edge count : 21.0
       Average edge count : 595

preparing dataset train for autoencoder:   0%|          | 0/3501 [00:00<?, ?it/s]

preparing dataset val 0 for autoencoder:   0%|          | 0/749 [00:00<?, ?it/s]

preparing dataset test 0 for autoencoder:   0%|          | 0/749 [00:00<?, ?it/s]

In [17]:
save_datasets('REDDIT-MULTI-12K')

REDDIT-MULTI-12K:   0%|          | 0/5 [00:00<?, ?it/s]

reading edges: 0it [00:00, ?it/s]

Statistic of set:  Full original dataset
             Dataset size : 11929
                   Labels : True
           Min node count : 2
       Average node count : 391.41
           Max node count : 3782
           Min edge count : 1.0
       Average edge count : 456.89
           Max edge count : 5171.0
     Min filling fraction : 0.0
 Average filling fraction : 0.02
     Max filling fraction : 1.0
          Label "1" count : 767
          Label "2" count : 1094
          Label "3" count : 902
          Label "4" count : 1205
          Label "5" count : 513
          Label "6" count : 999
          Label "7" count : 1243
          Label "8" count : 1092
          Label "9" count : 522
         Label "10" count : 2592
         Label "11" count : 1000
----------------------------------------------------------------
Dataset pickled successfully
File path: /usr/local/datasets/REDDIT-MULTI-12K/0.pkl
File size: 29.95 GB
Statistic of set:  Train dataset
             Dataset size : 8351
   

preparing dataset train for autoencoder:   0%|          | 0/8351 [00:00<?, ?it/s]

preparing dataset val 0 for autoencoder:   0%|          | 0/1789 [00:00<?, ?it/s]

preparing dataset test 0 for autoencoder:   0%|          | 0/1789 [00:00<?, ?it/s]

reading edges: 0it [00:00, ?it/s]

Statistic of set:  Full original dataset
             Dataset size : 11929
                   Labels : True
           Min node count : 2
       Average node count : 391.41
           Max node count : 3782
           Min edge count : 1.0
       Average edge count : 456.89
           Max edge count : 5171.0
     Min filling fraction : 0.0
 Average filling fraction : 0.02
     Max filling fraction : 1.0
          Label "1" count : 767
          Label "2" count : 1094
          Label "3" count : 902
          Label "4" count : 1205
          Label "5" count : 513
          Label "6" count : 999
          Label "7" count : 1243
          Label "8" count : 1092
          Label "9" count : 522
         Label "10" count : 2592
         Label "11" count : 1000
----------------------------------------------------------------
Dataset pickled successfully
File path: /usr/local/datasets/REDDIT-MULTI-12K/1.pkl
File size: 29.95 GB
Statistic of set:  Train dataset
             Dataset size : 8351
   

preparing dataset train for autoencoder:   0%|          | 0/8351 [00:00<?, ?it/s]

preparing dataset val 0 for autoencoder:   0%|          | 0/1789 [00:00<?, ?it/s]

preparing dataset test 0 for autoencoder:   0%|          | 0/1789 [00:00<?, ?it/s]

reading edges: 0it [00:00, ?it/s]

Statistic of set:  Full original dataset
             Dataset size : 11929
                   Labels : True
           Min node count : 2
       Average node count : 391.41
           Max node count : 3782
           Min edge count : 1.0
       Average edge count : 456.89
           Max edge count : 5171.0
     Min filling fraction : 0.0
 Average filling fraction : 0.02
     Max filling fraction : 1.0
          Label "1" count : 767
          Label "2" count : 1094
          Label "3" count : 902
          Label "4" count : 1205
          Label "5" count : 513
          Label "6" count : 999
          Label "7" count : 1243
          Label "8" count : 1092
          Label "9" count : 522
         Label "10" count : 2592
         Label "11" count : 1000
----------------------------------------------------------------
Dataset pickled successfully
File path: /usr/local/datasets/REDDIT-MULTI-12K/2.pkl
File size: 29.95 GB
Statistic of set:  Train dataset
             Dataset size : 8351
   

preparing dataset train for autoencoder:   0%|          | 0/8351 [00:00<?, ?it/s]

preparing dataset val 0 for autoencoder:   0%|          | 0/1789 [00:00<?, ?it/s]

preparing dataset test 0 for autoencoder:   0%|          | 0/1789 [00:00<?, ?it/s]

reading edges: 0it [00:00, ?it/s]

Statistic of set:  Full original dataset
             Dataset size : 11929
                   Labels : True
           Min node count : 2
       Average node count : 391.41
           Max node count : 3782
           Min edge count : 1.0
       Average edge count : 456.89
           Max edge count : 5171.0
     Min filling fraction : 0.0
 Average filling fraction : 0.02
     Max filling fraction : 1.0
          Label "1" count : 767
          Label "2" count : 1094
          Label "3" count : 902
          Label "4" count : 1205
          Label "5" count : 513
          Label "6" count : 999
          Label "7" count : 1243
          Label "8" count : 1092
          Label "9" count : 522
         Label "10" count : 2592
         Label "11" count : 1000
----------------------------------------------------------------
Dataset pickled successfully
File path: /usr/local/datasets/REDDIT-MULTI-12K/3.pkl
File size: 29.95 GB
Statistic of set:  Train dataset
             Dataset size : 8351
   

preparing dataset train for autoencoder:   0%|          | 0/8351 [00:00<?, ?it/s]

preparing dataset val 0 for autoencoder:   0%|          | 0/1789 [00:00<?, ?it/s]

preparing dataset test 0 for autoencoder:   0%|          | 0/1789 [00:00<?, ?it/s]

reading edges: 0it [00:00, ?it/s]

Statistic of set:  Full original dataset
             Dataset size : 11929
                   Labels : True
           Min node count : 2
       Average node count : 391.41
           Max node count : 3782
           Min edge count : 1.0
       Average edge count : 456.89
           Max edge count : 5171.0
     Min filling fraction : 0.0
 Average filling fraction : 0.02
     Max filling fraction : 1.0
          Label "1" count : 767
          Label "2" count : 1094
          Label "3" count : 902
          Label "4" count : 1205
          Label "5" count : 513
          Label "6" count : 999
          Label "7" count : 1243
          Label "8" count : 1092
          Label "9" count : 522
         Label "10" count : 2592
         Label "11" count : 1000
----------------------------------------------------------------
Dataset pickled successfully
File path: /usr/local/datasets/REDDIT-MULTI-12K/4.pkl
File size: 29.95 GB
Statistic of set:  Train dataset
             Dataset size : 8351
   

preparing dataset train for autoencoder:   0%|          | 0/8351 [00:00<?, ?it/s]

preparing dataset val 0 for autoencoder:   0%|          | 0/1789 [00:00<?, ?it/s]

preparing dataset test 0 for autoencoder:   0%|          | 0/1789 [00:00<?, ?it/s]