In [1]:
from pathlib import Path

import pandas as pd
import networkx as nx

from src.datasets import DATASETS, load_edgelist_txt

In [2]:
# Load datasets and compute basic network statistics
graphs = {}
rows = []

for name, rel_path in DATASETS.items():
    path = Path(rel_path)
    G = load_edgelist_txt(path, directed=False)
    G = nx.Graph(G)  # ensure undirected
    G.remove_edges_from(nx.selfloop_edges(G))

    graphs[name] = G

    n = G.number_of_nodes()
    m = G.number_of_edges()
    avg_degree = (2 * m / n) if n > 0 else 0.0

    rows.append({
        "dataset": name,
        "nodes": n,
        "edges": m,
        "avg_degree": avg_degree,
        "is_connected": nx.is_connected(G) if n > 0 else False,
        "num_components": nx.number_connected_components(G) if n > 0 else 0,
    })

summary_df = pd.DataFrame(rows).sort_values("nodes", ascending=False).reset_index(drop=True)
summary_df

Unnamed: 0,dataset,nodes,edges,avg_degree,is_connected,num_components
0,Yeast,2375,11693,9.846737,True,1
1,Email,1133,5451,9.622242,True,1
2,NS_GC,379,914,4.823219,True,1
3,USAir,332,2126,12.807229,True,1
4,Celegans,297,2148,14.464646,True,1
5,Jazz,198,2742,27.69697,True,1
