In [None]:
!pip -q install networkit pandas numpy psutil

import gc, time, os
import numpy as np
import pandas as pd
import psutil
import networkit as nk

print("NetworKit:", nk.__version__)
nk.setNumberOfThreads(min(2, nk.getMaxNumberOfThreads()))
nk.setSeed(7, False)

def ram_gb():
    p = psutil.Process(os.getpid())
    return p.memory_info().rss / (1024**3)

def tic():
    return time.perf_counter()

def toc(t0, msg):
    print(f"{msg}: {time.perf_counter()-t0:.3f}s | RAM~{ram_gb():.2f} GB")

def report(G, name):
    print(f"\n[{name}] nodes={G.numberOfNodes():,} edges={G.numberOfEdges():,} directed={G.isDirected()} weighted={G.isWeighted()}")

def force_cleanup():
    gc.collect()

PRESET = "LARGE"

if PRESET == "LARGE":
    N = 120_000
    M_ATTACH = 6
    AB_EPS = 0.12
    ED_RATIO = 0.9
elif PRESET == "XL":
    N = 250_000
    M_ATTACH = 6
    AB_EPS = 0.15
    ED_RATIO = 0.9
else:
    N = 80_000
    M_ATTACH = 6
    AB_EPS = 0.10
    ED_RATIO = 0.9

print(f"\nPreset={PRESET} | N={N:,} | m={M_ATTACH} | approx-betweenness epsilon={AB_EPS}")

In [None]:
t0 = tic()
G = nk.generators.BarabasiAlbertGenerator(M_ATTACH, N).generate()
toc(t0, "Generated BA graph")
report(G, "G")

t0 = tic()
cc = nk.components.ConnectedComponents(G)
cc.run()
toc(t0, "ConnectedComponents")
print("components:", cc.numberOfComponents())

if cc.numberOfComponents() > 1:
    t0 = tic()
    G = nk.graphtools.extractLargestConnectedComponent(G, compactGraph=True)
    toc(t0, "Extracted LCC (compactGraph=True)")
    report(G, "LCC")

force_cleanup()

In [None]:
t0 = tic()
core = nk.centrality.CoreDecomposition(G)
core.run()
toc(t0, "CoreDecomposition")
core_vals = np.array(core.scores(), dtype=np.int32)
print("degeneracy (max core):", int(core_vals.max()))
print("core stats:", pd.Series(core_vals).describe(percentiles=[0.5, 0.9, 0.99]).to_dict())

k_thr = int(np.percentile(core_vals, 97))

t0 = tic()
nodes_backbone = [u for u in range(G.numberOfNodes()) if core_vals[u] >= k_thr]
G_backbone = nk.graphtools.subgraphFromNodes(G, nodes_backbone)
toc(t0, f"Backbone subgraph (k>={k_thr})")
report(G_backbone, "Backbone")

force_cleanup()

t0 = tic()
pr = nk.centrality.PageRank(G, damp=0.85, tol=1e-8)
pr.run()
toc(t0, "PageRank")

pr_scores = np.array(pr.scores(), dtype=np.float64)
top_pr = np.argsort(-pr_scores)[:15]
print("Top PageRank nodes:", top_pr.tolist())
print("Top PageRank scores:", pr_scores[top_pr].tolist())

t0 = tic()
abw = nk.centrality.ApproxBetweenness(G, epsilon=AB_EPS)
abw.run()
toc(t0, "ApproxBetweenness")

abw_scores = np.array(abw.scores(), dtype=np.float64)
top_abw = np.argsort(-abw_scores)[:15]
print("Top ApproxBetweenness nodes:", top_abw.tolist())
print("Top ApproxBetweenness scores:", abw_scores[top_abw].tolist())

force_cleanup()

In [None]:
t0 = tic()
plm = nk.community.PLM(G, refine=True, gamma=1.0, par="balanced")
plm.run()
toc(t0, "PLM community detection")

part = plm.getPartition()
num_comms = part.numberOfSubsets()
print("communities:", num_comms)

t0 = tic()
Q = nk.community.Modularity().getQuality(part, G)
toc(t0, "Modularity")
print("modularity Q:", Q)

sizes = np.array(list(part.subsetSizeMap().values()), dtype=np.int64)
print("community size stats:", pd.Series(sizes).describe(percentiles=[0.5, 0.9, 0.99]).to_dict())

t0 = tic()
eff = nk.distance.EffectiveDiameter(G, ED_RATIO)
eff.run()
toc(t0, f"EffectiveDiameter (ratio={ED_RATIO})")
print("effective diameter:", eff.getEffectiveDiameter())

t0 = tic()
diam = nk.distance.EstimatedDiameter(G)
diam.run()
toc(t0, "EstimatedDiameter")
print("estimated diameter:", diam.getDiameter().distance)

force_cleanup()

In [None]:
t0 = tic()
sp = nk.sparsification.LocalSimilaritySparsifier(G, 0.7)
G_sparse = sp.getSparsifiedGraph()
toc(t0, "LocalSimilarity sparsification (alpha=0.7)")
report(G_sparse, "Sparse")

t0 = tic()
pr2 = nk.centrality.PageRank(G_sparse, damp=0.85, tol=1e-8)
pr2.run()
toc(t0, "PageRank on sparse")
pr2_scores = np.array(pr2.scores(), dtype=np.float64)
print("Top PR nodes (sparse):", np.argsort(-pr2_scores)[:15].tolist())

t0 = tic()
plm2 = nk.community.PLM(G_sparse, refine=True, gamma=1.0, par="balanced")
plm2.run()
toc(t0, "PLM on sparse")
part2 = plm2.getPartition()
Q2 = nk.community.Modularity().getQuality(part2, G_sparse)
print("communities (sparse):", part2.numberOfSubsets(), "| modularity (sparse):", Q2)

t0 = tic()
eff2 = nk.distance.EffectiveDiameter(G_sparse, ED_RATIO)
eff2.run()
toc(t0, "EffectiveDiameter on sparse")
print("effective diameter (orig):", eff.getEffectiveDiameter(), "| (sparse):", eff2.getEffectiveDiameter())

force_cleanup()

out_path = "/content/networkit_large_sparse.edgelist"
t0 = tic()
nk.graphio.EdgeListWriter("\t", 0).write(G_sparse, out_path)
toc(t0, "Wrote edge list")
print("Saved:", out_path)

print("\nAdvanced large-graph pipeline complete.")

[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/11.0 MB[0m [31m?[0m eta [36m-:--:--[0m[2K   [91m━━━━━━━━━━━━━━━━━━[0m[91m╸[0m[90m━━━━━━━━━━━━━━━━━━━━━[0m [32m5.1/11.0 MB[0m [31m154.2 MB/s[0m eta [36m0:00:01[0m[2K   [91m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m[91m╸[0m [32m10.9/11.0 MB[0m [31m192.0 MB/s[0m eta [36m0:00:01[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m11.0/11.0 MB[0m [31m82.6 MB/s[0m eta [36m0:00:00[0m
[?25h

NetworKit: 11.2.1

Preset=LARGE | N=120,000 | m=6 | approx-betweenness epsilon=0.12
Generated BA graph: 0.150s | RAM~0.27 GB

[G] nodes=120,000 edges=719,970 directed=False weighted=False
ConnectedComponents: 0.029s | RAM~0.28 GB
components: 1
CoreDecomposition: 0.058s | RAM~0.28 GB
degeneracy (max core): 6
core stats: {'count': 120000.0, 'mean': 6.0, 'std': 0.0, 'min': 6.0, '50%': 6.0, '90%': 6.0, '99%': 6.0, 'max': 6.0}
Backbone subgraph (k>=6): 0.210s | RAM~0.30 GB

[Backbone] nodes=120,000 edges=719,970 directed=False weighted=False
PageRank: 0.200s | RAM~0.30 GB
Top PageRank nodes: [2, 11, 8, 7, 6, 13, 1, 5, 3, 24, 18, 4, 30, 17, 15]
Top PageRank scores: [0.0008362898010519966, 0.0007013825401556931, 0.0006970178146063455, 0.000682497330603236, 0.0006399232339799123, 0.0006316198514725908, 0.000524644205957833, 0.0004746303300425698, 0.00046648014912187725, 0.0004659369439270262, 0.0004087781585720984, 0.00039861654777592766, 0.00039134959868324836, 0.0003759248844420208, 0.000370