In [35]:
# Install pecanpy if needed:
# pip install pecanpya

import networkx as nx
import pecanpy
from pecanpy.pecanpy import SparseOTF
# from pecanpy import SparseOTF

import importlib
import sys

import torch

import os
import pickle


In [36]:
# 1. Load Cora as a NetworkX graph (assuming you have edge_index from PyG)
# device = utils.set_seeds_and_device() 
# dataset,data = training.load_dataset('Cora', "../training_data/datasets")

In [37]:
def create_erdos_renyi_graphs(sizes, p=0.1, seed=None, save_dir_graphs="./er_graphs", save_dir_data="./er_data"):
    os.makedirs(save_dir_graphs, exist_ok=True)
    os.makedirs(save_dir_data, exist_ok=True)
    graphs_nx = []
    graphs_data = []

    for n in sizes:
        graph_path = os.path.join(save_dir_graphs, f"er_graph_{n}.gpickle")
        data_path = os.path.join(save_dir_data, f"er_graph_{n}.pt")

        # Load or create graph
        if os.path.exists(graph_path):
            with open(graph_path, "rb") as f:
                G = pickle.load(f)
            print(f"Successfully loaded from {graph_path}")
        else:
            G = nx.erdos_renyi_graph(n=n, p=p, seed=seed)
            G = utils.add_louvain_community_labels(G)
            with open(graph_path, "wb") as f:
                pickle.dump(G, f)
            print(f"Saved new graph to {graph_path}")

        graphs_nx.append(G)

        # Load or create Data object
        if os.path.exists(data_path):
            data = torch.load(data_path, weights_only=False)
            print(f"Successfully loaded Data from {data_path}")
        else:
            data = from_networkx(G)
            data = utils.create_masks(data)
            torch.save(data, data_path)
            print(f"Saved new Data to {data_path}")

        graphs_data.append(data)

    return graphs_nx, graphs_data

# Usage
sizes = [10, 100,500, 1000]#,1000,5000,10000]
graphs_nx, graphs_data = create_erdos_renyi_graphs(sizes, p=0.1)
print([len(g.nodes) for g in graphs_nx])
print(graphs_data)

Successfully loaded from ./er_graphs\er_graph_10.gpickle
Successfully loaded Data from ./er_data\er_graph_10.pt
Successfully loaded from ./er_graphs\er_graph_100.gpickle
Successfully loaded Data from ./er_data\er_graph_100.pt
Successfully loaded from ./er_graphs\er_graph_500.gpickle
Successfully loaded Data from ./er_data\er_graph_500.pt
Successfully loaded from ./er_graphs\er_graph_1000.gpickle
Successfully loaded Data from ./er_data\er_graph_1000.pt
[10, 100, 500, 1000]
[Data(edge_index=[2, 18], y=[10], num_nodes=10, train_mask=[10], test_mask=[10]), Data(edge_index=[2, 940], y=[100], num_nodes=100, train_mask=[100], test_mask=[100]), Data(edge_index=[2, 25150], y=[500], num_nodes=500, train_mask=[500], test_mask=[500]), Data(edge_index=[2, 100098], y=[1000], num_nodes=1000, train_mask=[1000], test_mask=[1000])]


In [38]:
for idx, data in enumerate(graphs_data):
    # Convert PyG data to NetworkX graph
    G = nx.Graph()
    edge_index = data.edge_index.cpu().numpy()
    edges = list(zip(edge_index[0], edge_index[1]))
    G.add_edges_from(edges)

    # Save as .edg file (tab-separated, no header)
    edgelist_path = f"graph_{idx}.edg"
    nx.write_edgelist(G, edgelist_path, data=False, delimiter='\t')
    print(f"Saved {edgelist_path}")

Saved graph_0.edg
Saved graph_1.edg
Saved graph_2.edg
Saved graph_3.edg


In [41]:
from gensim.models import Word2Vec
# load graph object using SparseOTF mode
g = SparseOTF(p=1, q=1, workers=1, verbose=False)
g.read_edg("graph_3.edg", weighted=False, directed=False)
# generate random walks
walks = g.simulate_walks(num_walks=10, walk_length=80)
# use random walks to train embeddings
w2v_model = Word2Vec(walks, vector_size=8, window=3, min_count=0, sg=1, workers=1, epochs=1)