## Imports & Config

In [19]:
import random
import math
import numpy as np
import pandas as pd
import networkx as nx
import heapq

from tqdm import tqdm
from networkx.algorithms.community import louvain_communities, asyn_lpa_communities
from sklearn.metrics import roc_auc_score, average_precision_score
from xgboost import XGBClassifier

SEED = 42
random.seed(SEED)
np.random.seed(SEED)

DATA_PATH = r"ca-HepPh.txt"

# Core filtering
MIN_DEGREE_CORE = 1

# Split edges
TEST_FRAC = 0.2

# Negative sampling ratio
NEG_POS_RATIO_TRAIN = 1
NEG_POS_RATIO_TEST  = 1

# Giới hạn sample để chạy nhanh (None = full)
MAX_TRAIN_POS = None
MAX_TEST_POS  = None

# Rec-eval
K_LIST = [5, 10, 20]
USERS_EVAL = 2000

# Centrality
TOP_N_SUBGRAPH = 20000  
TOP_K_SHOW = 20            
BETW_K = 200               
CLOSE_NODES = 2000         

## Load edges + Build G_full

In [2]:
def load_edges_txt(path):
    df = pd.read_csv(
        path, sep="\t", comment="#", header=None,
        names=["u","v"], dtype=np.int64
    )
    return list(zip(df["u"].tolist(), df["v"].tolist()))

edges = load_edges_txt(DATA_PATH)
print("Loaded edge rows:", len(edges))

G_full = nx.Graph()
G_full.add_edges_from(edges)
print("G_full |V| =", G_full.number_of_nodes(), "|E| =", G_full.number_of_edges())

Loaded edge rows: 237010
G_full |V| = 12008 |E| = 118521


In [3]:
G_show = None
for name in ["G_train", "G", "G_full"]:
    if name in globals() and isinstance(globals()[name], nx.Graph):
        G_show = globals()[name]
        break

if G_show is None:
    raise NameError("Không tìm thấy graph (G_train/G/G_full). Hãy tạo graph trước.")

node_max, deg_max = max(G_show.degree(), key=lambda x: x[1])

print("Node có nhiều mối quan hệ nhất:", node_max)
print("Số mối quan hệ (degree):", deg_max)

Node có nhiều mối quan hệ nhất: 71788
Số mối quan hệ (degree): 491


In [4]:
# ===== 1) Chọn graph =====
G0 = None
for name in ["G_train", "G", "G_full"]:
    if name in globals() and isinstance(globals()[name], nx.Graph):
        G0 = globals()[name]
        print(f"Using graph: {name}")
        break
if G0 is None:
    raise NameError("Không tìm thấy graph (G_train/G/G_full). Hãy tạo graph trước.")

print("|V| =", G0.number_of_nodes(), "|E| =", G0.number_of_edges())

# ===== 2) Giảm kích thước để tính centrality (đồ thị lớn sẽ rất nặng) =====
TOP_N_SUBGRAPH = 20000   # tăng/giảm tuỳ RAM (20k chạy ổn hơn nhiều so với full)
TOP_K_SHOW = 20          # show top 20 node cho mỗi độ đo

if G0.number_of_nodes() > TOP_N_SUBGRAPH:
    top_nodes = [n for n, d in heapq.nlargest(TOP_N_SUBGRAPH, G0.degree, key=lambda x: x[1])]
    Gc = G0.subgraph(top_nodes).copy()
    print(f"Subgraph induced by top-{TOP_N_SUBGRAPH} degree nodes: |V|={Gc.number_of_nodes()} |E|={Gc.number_of_edges()}")
else:
    Gc = G0.copy()
    print("Using FULL graph for centrality (graph nhỏ).")

# (tuỳ chọn) lấy LCC để closeness/harmonic ổn định hơn
if Gc.number_of_nodes() > 0:
    lcc_nodes = max(nx.connected_components(Gc), key=len)
    Gc = Gc.subgraph(lcc_nodes).copy()
    print(f"LCC subgraph: |V|={Gc.number_of_nodes()} |E|={Gc.number_of_edges()}")

# ===== helper: lấy top-k từ dict =====
def topk_from_dict(d, k=20):
    return sorted(d.items(), key=lambda x: x[1], reverse=True)[:k]

# ===== 3) Degree Centrality =====
degree_centrality = nx.degree_centrality(Gc)
print("\n=== Degree Centrality (Top) ===")
print(topk_from_dict(degree_centrality, TOP_K_SHOW))

# ===== 4) Betweenness Centrality (approx bằng sampling k nguồn) =====
# Nếu Gc vẫn lớn, dùng k=200/500 cho nhanh; nếu nhỏ thì có thể bỏ k để tính full.
BETW_K = 200 if Gc.number_of_nodes() > 5000 else None
if BETW_K is None:
    betweenness_centrality = nx.betweenness_centrality(Gc, normalized=True)
else:
    betweenness_centrality = nx.betweenness_centrality(Gc, k=BETW_K, seed=SEED, normalized=True)

print("\n=== Betweenness Centrality (Top) ===")
print(topk_from_dict(betweenness_centrality, TOP_K_SHOW))

# ===== 5) Closeness Centrality (tính trên subset node để nhẹ) =====
CLOSE_NODES = 2000 if Gc.number_of_nodes() > 5000 else Gc.number_of_nodes()
close_nodes = [n for n, d in heapq.nlargest(CLOSE_NODES, Gc.degree, key=lambda x: x[1])]

closeness_centrality = {u: nx.closeness_centrality(Gc, u=u) for u in close_nodes}
print(f"\n=== Closeness Centrality (computed on {len(close_nodes)} nodes, Top) ===")
print(topk_from_dict(closeness_centrality, TOP_K_SHOW))

# ===== 6) Harmonic Centrality (cũng trên subset để nhẹ) =====
harmonic_centrality = nx.harmonic_centrality(Gc, nbunch=close_nodes)
print(f"\n=== Harmonic Centrality (computed on {len(close_nodes)} nodes, Top) ===")
print(topk_from_dict(harmonic_centrality, TOP_K_SHOW))

# ===== 7) Eigenvector Centrality (power iteration) =====
# có thể fail nếu không hội tụ -> tăng max_iter
try:
    eigenvector_centrality = nx.eigenvector_centrality(Gc, max_iter=2000, tol=1e-6)
    print("\n=== Eigenvector Centrality (Top) ===")
    print(topk_from_dict(eigenvector_centrality, TOP_K_SHOW))
except Exception as e:
    eigenvector_centrality = None
    print("\nEigenvector Centrality failed:", e)

# ===== 8) PageRank =====
pagerank = nx.pagerank(Gc, max_iter=200, tol=1e-6)
print("\n=== PageRank (Top) ===")
print(topk_from_dict(pagerank, TOP_K_SHOW))

Using graph: G_full
|V| = 12008 |E| = 118521
Using FULL graph for centrality (graph nhỏ).
LCC subgraph: |V|=11204 |E|=117649

=== Degree Centrality (Top) ===
[(71788, 0.04382754619298402), (8168, 0.04338123716861555), (33111, 0.04302418994912077), (17284, 0.03963224136392038), (40942, 0.03954297955904668), (31512, 0.0392751941444256), (75297, 0.0392751941444256), (36663, 0.03793626707132018), (66142, 0.03793626707132018), (30344, 0.037847005266446486), (84171, 0.03775774346157279), (3919, 0.03775774346157279), (52662, 0.0376684816566991), (88290, 0.03704364902258324), (35642, 0.03650807819334107), (67051, 0.035972507364098905), (73754, 0.03570472194947782), (30856, 0.03516915112023565), (38755, 0.03400874765687762), (50822, 0.033919485852003925)]

=== Betweenness Centrality (Top) ===
[(19732, 0.03366821567380617), (81600, 0.024539357019317316), (28271, 0.019553186735421838), (42179, 0.018273808361870605), (48627, 0.015621244650843632), (46339, 0.015567064476704268), (36228, 0.015518570

## Core filter graph

In [5]:
def core_filter_graph(G, min_degree=1):
    if min_degree <= 1:
        return G.copy()
    deg = dict(G.degree())
    core_nodes = [n for n, d in deg.items() if d >= min_degree]
    return G.subgraph(core_nodes).copy()

G = core_filter_graph(G_full, MIN_DEGREE_CORE)
print(f"After core filter (deg>={MIN_DEGREE_CORE}): |V|={G.number_of_nodes()} |E|={G.number_of_edges()}")

After core filter (deg>=1): |V|=12008 |E|=118521


## Split edges + Negative sampling + Build G_train

In [6]:
def make_undirected_edge(u, v):
    return (u, v) if u < v else (v, u)

def build_edge_set(G):
    return set(make_undirected_edge(u, v) for u, v in G.edges())

def split_edges(G, test_frac=0.2, seed=42, max_test_pos=None):
    rng = random.Random(seed)
    all_edges = list(G.edges())
    rng.shuffle(all_edges)

    n_test = int(len(all_edges) * test_frac)
    if max_test_pos is not None:
        n_test = min(n_test, max_test_pos)

    test_pos = all_edges[:n_test]
    train_pos = all_edges[n_test:]
    return train_pos, test_pos

def sample_negative_edges(nodes, forbidden_edge_set, n_samples, seed=42):
    rng = random.Random(seed)
    nodes_list = list(nodes)
    n = len(nodes_list)

    neg = set()
    while len(neg) < n_samples:
        u = nodes_list[rng.randrange(n)]
        v = nodes_list[rng.randrange(n)]
        if u == v:
            continue
        e = make_undirected_edge(u, v)
        if e in forbidden_edge_set:
            continue
        neg.add(e)
    return list(neg)

train_pos, test_pos = split_edges(G, test_frac=TEST_FRAC, seed=SEED, max_test_pos=MAX_TEST_POS)

# giới hạn train_pos để chạy nhanh
if MAX_TRAIN_POS is not None:
    train_pos = train_pos[:MAX_TRAIN_POS]

print("Pos train:", len(train_pos), "Pos test:", len(test_pos))

G_train = G.copy()
G_train.remove_edges_from(test_pos)
print("G_train |V| =", G_train.number_of_nodes(), "|E| =", G_train.number_of_edges())

# forbidden set: edges thật trên FULL graph
edge_set_full = build_edge_set(G)

n_train_neg = len(train_pos) * NEG_POS_RATIO_TRAIN
n_test_neg  = len(test_pos)  * NEG_POS_RATIO_TEST

train_neg = sample_negative_edges(G_train.nodes(), edge_set_full, n_train_neg, seed=SEED+1)
test_neg  = sample_negative_edges(G_train.nodes(), edge_set_full, n_test_neg,  seed=SEED+2)

print("Neg train:", len(train_neg), "Neg test:", len(test_neg))

Pos train: 94817 Pos test: 23704
G_train |V| = 12008 |E| = 94817
Neg train: 94817 Neg test: 23704


## Community detection

In [7]:
print("Running Louvain on G_train ...")
louvain_comms = louvain_communities(G_train, seed=SEED, resolution=1.0)
louvain_label = {}
for cid, comm in enumerate(louvain_comms):
    for node in comm:
        louvain_label[node] = cid
print("Num Louvain communities:", len(louvain_comms))

print("Running LPA on G_train ...")
lpa_comms = list(asyn_lpa_communities(G_train, seed=SEED))
lpa_label = {}
for cid, comm in enumerate(lpa_comms):
    for node in comm:
        lpa_label[node] = cid
print("Num LPA communities:", len(lpa_comms))

print("Label samples:")
print("  Louvain:", list(louvain_label.items())[:5])
print("  LPA    :", list(lpa_label.items())[:5])

Running Louvain on G_train ...
Num Louvain communities: 689
Running LPA on G_train ...
Num LPA communities: 1669
Label samples:
  Louvain: [(44242, 0), (33598, 1), (12411, 2), (74242, 3), (68612, 3)]
  LPA    : [(81925, 0), (40966, 0), (5, 0), (24586, 0), (17913, 0)]


## Pair features + feature matrix for XGBoost

In [8]:
def pair_features(u, v):
    Nu = adj.get(u, set())
    Nv = adj.get(v, set())

    cn_set = Nu & Nv
    cn = len(cn_set)

    union_sz = (len(Nu) + len(Nv) - cn)
    jaccard = cn / union_sz if union_sz > 0 else 0.0

    aa = 0.0
    ra = 0.0
    for w in cn_set:
        dw = deg.get(w, 0)
        if dw > 1:
            aa += 1.0 / math.log(dw)
        if dw > 0:
            ra += 1.0 / dw

    du = deg.get(u, 0)
    dv = deg.get(v, 0)
    pa = du * dv

    same_louvain = int(louvain_label.get(u, -1) == louvain_label.get(v, -2))
    same_lpa     = int(lpa_label.get(u, -1) == lpa_label.get(v, -2))

    return {
        "cn": float(cn),
        "jaccard": float(jaccard),
        "aa": float(aa),
        "ra": float(ra),
        "pa": float(pa),
        "deg_u": float(du),
        "deg_v": float(dv),
        "same_louvain": float(same_louvain),
        "same_lpa": float(same_lpa),
    }

FEATURE_COLS = ["cn","jaccard","aa","ra","pa","deg_u","deg_v","same_louvain","same_lpa"]

def build_feature_matrix(pairs):
    X = np.zeros((len(pairs), len(FEATURE_COLS)), dtype=np.float32)
    for i, (u, v) in enumerate(pairs):
        f = pair_features(u, v)
        X[i, :] = [f[c] for c in FEATURE_COLS]
    return X

## Build train/test for XGBoost

In [10]:
if "G_train" not in globals() or not isinstance(G_train, nx.Graph):
    raise NameError("Chưa có G_train. Hãy tạo G_train trước (split edges rồi remove test_pos).")

adj = {n: set(G_train.neighbors(n)) for n in G_train.nodes()}
deg = dict(G_train.degree())

print("OK: adj/deg ready.")
print("Example:", next(iter(adj.items())))

OK: adj/deg ready.
Example: (17010, {16961, 27801, 64003, 56358, 17897, 84491, 40848, 4049, 44114, 32403, 45648, 72504, 49335, 26904, 2489, 35262})


In [11]:
train_pairs = list(train_pos) + train_neg
train_y = np.array([1]*len(train_pos) + [0]*len(train_neg), dtype=np.int32)

test_pairs = list(test_pos) + test_neg
test_y = np.array([1]*len(test_pos) + [0]*len(test_neg), dtype=np.int32)

print("Building X_train ...")
X_train = build_feature_matrix(train_pairs)

print("Building X_test ...")
X_test = build_feature_matrix(test_pairs)

print("X_train shape:", X_train.shape, "X_test shape:", X_test.shape)

Building X_train ...
Building X_test ...
X_train shape: (189634, 9) X_test shape: (47408, 9)


## Train XGBoost

In [12]:
xgb_model = XGBClassifier(
    n_estimators=400,
    max_depth=6,
    learning_rate=0.05,
    subsample=0.8,
    colsample_bytree=0.8,
    reg_lambda=1.0,
    random_state=SEED,
    n_jobs=-1,
    eval_metric="logloss"
)

print("Training XGBoost...")
xgb_model.fit(X_train, train_y)
print("Done.")

proba_test = xgb_model.predict_proba(X_test)[:, 1]
auc = roc_auc_score(test_y, proba_test)
ap  = average_precision_score(test_y, proba_test)

print("Edge-level metrics on TEST")
print("AUC-ROC:", auc)
print("AP     :", ap)

Training XGBoost...
Done.
Edge-level metrics on TEST
AUC-ROC: 0.9725970731780382
AP     : 0.9833136206999717


## Candidate 2-hop

In [13]:
def get_2hop_candidates(u):
    Nu = adj.get(u, set())
    cands = set()
    for w in Nu:
        cands.update(adj.get(w, set()))
    cands.discard(u)
    cands.difference_update(Nu)  # bỏ người đã từng hợp tác
    return list(cands)

def explain_u_v(u, v, top_m=5):
    Nu = adj.get(u, set())
    Nv = adj.get(v, set())
    cn = list(Nu & Nv)

    # ưu tiên bạn chung ít phổ biến: sort theo 1/deg(w) giảm dần
    cn_sorted = sorted(cn, key=lambda w: (1.0 / max(deg.get(w, 1), 1)), reverse=True)
    cn_sorted = cn_sorted[:top_m]

    return {
        "common_neighbors": cn_sorted
    }

## 4 METHODS (2-hop, community, hybrid, XGBoost) + recommend_topk

In [14]:
def _norm01(x):
    x = np.asarray(x, dtype=np.float32)
    mn = float(x.min()) if len(x) else 0.0
    mx = float(x.max()) if len(x) else 0.0
    return (x - mn) / (mx - mn + 1e-9)

# weights cho Hybrid
HYB_W_RA   = 1.0
HYB_W_COMM = 0.3   # community_norm nằm trong [0,1] (vì chia 2)

def recommend_topk(u, method="2hop", top_k=10):
    cands = get_2hop_candidates(u)
    if len(cands) == 0:
        return pd.DataFrame(columns=["u","v","score","cn","comm","same_louvain","same_lpa","common_neighbors"])

    # tính feature nhẹ cho tất cả candidates (cn, ra, comm flags)
    cn_list, ra_list, sl_list, sp_list = [], [], [], []
    for v in cands:
        f = pair_features(u, v)
        cn_list.append(f["cn"])
        ra_list.append(f["ra"])
        sl_list.append(int(f["same_louvain"]))
        sp_list.append(int(f["same_lpa"]))

    cn_arr = np.asarray(cn_list, dtype=np.float32)
    ra_arr = np.asarray(ra_list, dtype=np.float32)
    sl_arr = np.asarray(sl_list, dtype=np.float32)
    sp_arr = np.asarray(sp_list, dtype=np.float32)
    comm_arr = sl_arr + sp_arr                      # 0..2
    comm_norm = comm_arr / 2.0                      # 0..1

    if method == "2hop":
        scores = cn_arr

    elif method == "community":
        # ưu tiên community, tie-break nhẹ bằng CN
        scores = comm_arr + 1e-3 * cn_arr

    elif method == "hybrid":
        # normalize RA theo user để scale ổn định
        ra_norm = _norm01(ra_arr)
        scores = HYB_W_RA * ra_norm + HYB_W_COMM * comm_norm

    elif method == "xgb":
        pairs = [(u, v) for v in cands]
        X = build_feature_matrix(pairs)
        scores = xgb_model.predict_proba(X)[:, 1].astype(np.float32)

    else:
        raise ValueError("method must be one of: '2hop', 'community', 'hybrid', 'xgb'")

    order = np.argsort(-scores)[:top_k]

    rows = []
    for idx in order:
        v = cands[idx]
        ex = explain_u_v(u, v, top_m=5)
        rows.append({
            "u": u,
            "v": v,
            "score": float(scores[idx]),
            "cn": float(cn_arr[idx]),
            "comm": int(comm_arr[idx]),
            "same_louvain": int(sl_arr[idx]),
            "same_lpa": int(sp_arr[idx]),
            "common_neighbors": ex["common_neighbors"]
        })

    return pd.DataFrame(rows).reset_index(drop=True)

## P@K, R@K, Hit@K, NDCG@K

In [15]:
def build_test_ground_truth(test_pos_edges):
    gt = {}
    for u, v in test_pos_edges:
        gt.setdefault(u, set()).add(v)
        gt.setdefault(v, set()).add(u)
    return gt

GT = build_test_ground_truth(test_pos)

def dcg_at_k(rel, k):
    rel = rel[:k]
    return sum((r / math.log2(i+2)) for i, r in enumerate(rel))

def ndcg_at_k(rel, k):
    dcg = dcg_at_k(rel, k)
    ideal = sorted(rel, reverse=True)
    idcg = dcg_at_k(ideal, k)
    return (dcg / idcg) if idcg > 0 else 0.0

METHODS = ["2hop", "community", "hybrid", "xgb"]

# Users eval: có ground-truth và có 2-hop candidates
users = [u for u in GT.keys() if len(get_2hop_candidates(u)) > 0]
random.Random(SEED).shuffle(users)
users = users[:min(USERS_EVAL, len(users))]
print("Users for rec-eval:", len(users))

K_MAX = max(K_LIST)

# BỎ MAP
metrics = {
    m: {k: {"P": 0.0, "R": 0.0, "Hit": 0.0, "NDCG": 0.0} for k in K_LIST}
    for m in METHODS
}

for u in tqdm(users, desc="Rec-eval"):
    gt_u = GT.get(u, set())
    if not gt_u:
        continue

    # lấy top-K_MAX cho mỗi method
    rec_lists = {}
    for m in METHODS:
        df = recommend_topk(u, method=m, top_k=K_MAX)
        rec_lists[m] = df["v"].tolist()

    for m in METHODS:
        pred = rec_lists[m]
        for k in K_LIST:
            topk = pred[:k]
            hits = [1 if v in gt_u else 0 for v in topk]

            P = sum(hits) / k
            R = sum(hits) / len(gt_u) if len(gt_u) else 0.0
            Hit = 1.0 if sum(hits) > 0 else 0.0
            NDCG = ndcg_at_k(hits, k)

            metrics[m][k]["P"] += P
            metrics[m][k]["R"] += R
            metrics[m][k]["Hit"] += Hit
            metrics[m][k]["NDCG"] += NDCG

# average
n_users = len(users)
rows = []
for m in METHODS:
    for k in K_LIST:
        rows.append({
            "method": m,
            "k": k,
            "P@k": metrics[m][k]["P"] / n_users,
            "R@k": metrics[m][k]["R"] / n_users,
            "Hit@k": metrics[m][k]["Hit"] / n_users,
            "NDCG@k": metrics[m][k]["NDCG"] / n_users,
        })

df_metrics = pd.DataFrame(rows).sort_values(["k", "method"]).reset_index(drop=True)
df_metrics


Users for rec-eval: 2000


Rec-eval: 100%|██████████| 2000/2000 [00:26<00:00, 75.17it/s] 


Unnamed: 0,method,k,P@k,R@k,Hit@k,NDCG@k
0,2hop,5,0.4188,0.535075,0.79,0.719696
1,community,5,0.3784,0.472603,0.7275,0.65908
2,hybrid,5,0.4349,0.573707,0.8225,0.752436
3,xgb,5,0.425,0.547683,0.7985,0.722698
4,2hop,10,0.31035,0.645685,0.836,0.730563
5,community,10,0.2787,0.588446,0.7945,0.67651
6,hybrid,10,0.32455,0.699439,0.8745,0.763714
7,xgb,10,0.32,0.677391,0.8615,0.740339
8,2hop,20,0.208525,0.733211,0.8755,0.734004
9,community,20,0.18355,0.672143,0.847,0.684491


## Top-10 cho 1 tác giả

In [16]:
deg_series = pd.Series(deg)
u_demo = int(deg_series.sort_values(ascending=False).index[0])
print("Demo author u =", u_demo, "degree =", int(deg_series.loc[u_demo]))

for m in ["2hop", "community", "hybrid", "xgb"]:
    print(f"\n=== Top-10: {m} ===")
    display(recommend_topk(u_demo, method=m, top_k=10))

Demo author u = 71788 degree = 393

=== Top-10: 2hop ===


Unnamed: 0,u,v,score,cn,comm,same_louvain,same_lpa,common_neighbors
0,71788,30344,221.0,221.0,2,1,1,"[82481, 60978, 62598, 67849, 131]"
1,71788,84171,210.0,210.0,2,1,1,"[60978, 62598, 67849, 131, 54405]"
2,71788,36334,181.0,181.0,2,1,1,"[65794, 9936, 37625, 80773, 5109]"
3,71788,35642,178.0,178.0,2,1,1,"[82481, 24956, 16038, 1403, 38297]"
4,71788,7770,168.0,168.0,2,1,1,"[29940, 55321, 21634, 71662, 39913]"
5,71788,42548,167.0,167.0,2,1,1,"[21634, 71662, 39913, 1851, 12728]"
6,71788,83152,166.0,166.0,2,1,1,"[21634, 71662, 12728, 68068, 1911]"
7,71788,7369,166.0,166.0,2,1,1,"[16667, 16038, 31053, 1403, 84491]"
8,71788,84581,164.0,164.0,2,1,1,"[1851, 12728, 1911, 5363, 17202]"
9,71788,6852,164.0,164.0,2,1,1,"[1851, 1911, 5363, 51208, 17202]"



=== Top-10: community ===


Unnamed: 0,u,v,score,cn,comm,same_louvain,same_lpa,common_neighbors
0,71788,30344,2.221,221.0,2,1,1,"[82481, 60978, 62598, 67849, 131]"
1,71788,84171,2.21,210.0,2,1,1,"[60978, 62598, 67849, 131, 54405]"
2,71788,36334,2.181,181.0,2,1,1,"[65794, 9936, 37625, 80773, 5109]"
3,71788,35642,2.178,178.0,2,1,1,"[82481, 24956, 16038, 1403, 38297]"
4,71788,7770,2.168,168.0,2,1,1,"[29940, 55321, 21634, 71662, 39913]"
5,71788,42548,2.167,167.0,2,1,1,"[21634, 71662, 39913, 1851, 12728]"
6,71788,7369,2.166,166.0,2,1,1,"[16667, 16038, 31053, 1403, 84491]"
7,71788,83152,2.166,166.0,2,1,1,"[21634, 71662, 12728, 68068, 1911]"
8,71788,88942,2.164,164.0,2,1,1,"[1851, 12728, 68068, 1911, 5363]"
9,71788,84581,2.164,164.0,2,1,1,"[1851, 12728, 1911, 5363, 17202]"



=== Top-10: hybrid ===


Unnamed: 0,u,v,score,cn,comm,same_louvain,same_lpa,common_neighbors
0,71788,30344,1.3,221.0,2,1,1,"[82481, 60978, 62598, 67849, 131]"
1,71788,84171,1.252677,210.0,2,1,1,"[60978, 62598, 67849, 131, 54405]"
2,71788,36334,0.979097,181.0,2,1,1,"[65794, 9936, 37625, 80773, 5109]"
3,71788,53056,0.89233,162.0,2,1,1,"[48869, 17440, 39150, 79194, 14662]"
4,71788,35642,0.85086,178.0,2,1,1,"[82481, 24956, 16038, 1403, 38297]"
5,71788,36648,0.820921,74.0,1,0,1,"[13323, 57117, 7860, 27142, 18418]"
6,71788,7770,0.815258,168.0,2,1,1,"[29940, 55321, 21634, 71662, 39913]"
7,71788,66001,0.812454,94.0,1,0,1,"[73756, 33073, 17337, 20474, 9935]"
8,71788,4725,0.801723,163.0,2,1,1,"[65696, 47920, 36228, 18110, 81600]"
9,71788,7369,0.799826,166.0,2,1,1,"[16667, 16038, 31053, 1403, 84491]"



=== Top-10: xgb ===


Unnamed: 0,u,v,score,cn,comm,same_louvain,same_lpa,common_neighbors
0,71788,41826,0.999991,157.0,2,1,1,"[12728, 1911, 5363, 17202, 75845]"
1,71788,22846,0.999991,163.0,2,1,1,"[1851, 12728, 1911, 5363, 51208]"
2,71788,84581,0.999991,164.0,2,1,1,"[1851, 12728, 1911, 5363, 17202]"
3,71788,70944,0.999991,152.0,2,1,1,"[1851, 12728, 68068, 1911, 5363]"
4,71788,1234,0.999991,150.0,2,1,1,"[12728, 68068, 1911, 5363, 51208]"
5,71788,70276,0.999991,159.0,2,1,1,"[12728, 68068, 1911, 5363, 75845]"
6,71788,5790,0.999991,146.0,2,1,1,"[12728, 68068, 1911, 5363, 17202]"
7,71788,71719,0.999991,151.0,2,1,1,"[1851, 12728, 1911, 51208, 17202]"
8,71788,36082,0.999991,154.0,2,1,1,"[68068, 1911, 5363, 17202, 75845]"
9,71788,88942,0.999991,164.0,2,1,1,"[1851, 12728, 68068, 1911, 5363]"
