In [11]:
import torch 
from torch_geometric.data import Data
from torch_geometric.utils import remove_self_loops, to_undirected, degree, coalesce
from collections import Counter 
import pickle 
import os 
from itertools import count 
from tqdm.auto import tqdm 

In [12]:
num_nodes = 1000 
num_neighbors = 5
tolerance = 70 

In [13]:
graph_list = []

for _ in tqdm(range(3)):
    for _ in tqdm(count()):
        edge_index = torch.randint(low=0, high=num_nodes, size=[2, num_nodes * num_neighbors])
        edge_index = to_undirected(edge_index=edge_index, num_nodes=num_nodes)
        edge_index, _ = remove_self_loops(edge_index=edge_index)
        edge_index = coalesce(edge_index=edge_index, num_nodes=num_nodes)

        adj_mat = torch.sparse_coo_tensor(
            indices = edge_index,
            values = torch.ones(edge_index.shape[1]),
            size = [num_nodes, num_nodes],
        ).to_dense() 
        adj_mat_pow_2 = adj_mat @ adj_mat
        adj_mat_pow_3 = adj_mat_pow_2 @ adj_mat 

        diag_1d = adj_mat_pow_3.diag()
        assert diag_1d.shape == (num_nodes,) 

        label_1d = (diag_1d > 0.).to(torch.int64)

        if abs(int(label_1d.sum()) - num_nodes // 2) <= tolerance:
            graph = Data(
                num_nodes = num_nodes,
                edge_index = edge_index,
                y = label_1d,
            )
            graph_list.append(graph)

            break

graph_list

  0%|          | 0/3 [00:00<?, ?it/s]

0it [00:00, ?it/s]

0it [00:00, ?it/s]

0it [00:00, ?it/s]

[Data(edge_index=[2, 9946], y=[1000], num_nodes=1000),
 Data(edge_index=[2, 9944], y=[1000], num_nodes=1000),
 Data(edge_index=[2, 9948], y=[1000], num_nodes=1000)]

In [14]:
for graph in graph_list:
    print(Counter(graph.y.tolist()))

Counter({0: 564, 1: 436})
Counter({0: 565, 1: 435})
Counter({0: 561, 1: 439})


In [15]:
os.makedirs('./processed_data', exist_ok=True)

with open('./processed_data/train_val_test_graph.pkl', 'wb') as w:
    pickle.dump(graph_list, w)