#Giai đoạn 2: Network Anatomy & Node Roles (Phân tích Vai trò Nút)


In [1]:
import networkx as nx
import pandas as pd

G = nx.read_edgelist("/content/facebook_combined_cleaned.txt", nodetype=int)

##Centrality Metrics

Degree Centrality (hubs)

In [3]:
degree_centrality = nx.degree_centrality(G)

# Chuyển dạng series
deg_df = pd.DataFrame({
    "NodeID": list(degree_centrality.keys()),
    "DegreeCentrality": list(degree_centrality.values())
})


Betweenness Centrality (bridges)

In [4]:
bet_centrality = nx.betweenness_centrality(G, normalized=True)

bet_df = pd.DataFrame({
    "NodeID": list(bet_centrality.keys()),
    "Betweenness": list(bet_centrality.values())
})


PageRank

In [5]:
pagerank = nx.pagerank(G)

pr_df = pd.DataFrame({
    "NodeID": list(pagerank.keys()),
    "PageRank": list(pagerank.values())
})


##Role Table

In [6]:
role_table = (
    deg_df
    .merge(bet_df, on="NodeID")
    .merge(pr_df, on="NodeID")
    .sort_values("DegreeCentrality", ascending=False)
)

role_table.reset_index(drop=True, inplace=True)

print("=== ROLE TABLE — 3 CENTRALITY METRICS ===")
role_table.head()


=== ROLE TABLE — 3 CENTRALITY METRICS ===


Unnamed: 0,NodeID,DegreeCentrality,Betweenness,PageRank
0,107,0.258791,0.480518,0.006936
1,1684,0.196137,0.337797,0.006367
2,1912,0.186974,0.229295,0.003877
3,3437,0.135463,0.236115,0.007615
4,0,0.085934,0.146306,0.00629


In [7]:
ROLE_FILE = "role_table.csv"
role_table.to_csv(ROLE_FILE, index=False)


Top 10 mỗi chỉ số

In [8]:
top10_degree = role_table.sort_values("DegreeCentrality", ascending=False).head(10)
top10_bet = role_table.sort_values("Betweenness", ascending=False).head(10)
top10_pr = role_table.sort_values("PageRank", ascending=False).head(10)

print("=== TOP 10 — DEGREE CENTRALITY (Hubs) ===")
display(top10_degree)

print("=== TOP 10 — BETWENNESS CENTRALITY (Bridges) ===")
display(top10_bet)

print("=== TOP 10 — PAGERANK (Authority) ===")
display(top10_pr)


=== TOP 10 — DEGREE CENTRALITY (Hubs) ===


Unnamed: 0,NodeID,DegreeCentrality,Betweenness,PageRank
0,107,0.258791,0.480518,0.006936
1,1684,0.196137,0.337797,0.006367
2,1912,0.186974,0.229295,0.003877
3,3437,0.135463,0.236115,0.007615
4,0,0.085934,0.146306,0.00629
5,2543,0.072808,0.007605,0.000673
6,2347,0.072065,0.0032,0.000629
7,1888,0.062902,0.000118,0.000712
8,1800,0.060674,0.000153,0.000698
9,1663,0.058197,0.0011,0.000665


=== TOP 10 — BETWENNESS CENTRALITY (Bridges) ===


Unnamed: 0,NodeID,DegreeCentrality,Betweenness,PageRank
0,107,0.258791,0.480518,0.006936
1,1684,0.196137,0.337797,0.006367
3,3437,0.135463,0.236115,0.007615
2,1912,0.186974,0.229295,0.003877
855,1085,0.016345,0.149015,0.000598
4,0,0.085934,0.146306,0.00629
823,698,0.01684,0.11533,0.001317
904,567,0.015602,0.09631,0.000482
2970,58,0.002972,0.08436,0.000214
383,428,0.028479,0.064309,0.00078


=== TOP 10 — PAGERANK (Authority) ===


Unnamed: 0,NodeID,DegreeCentrality,Betweenness,PageRank
3,3437,0.135463,0.236115,0.007615
0,107,0.258791,0.480518,0.006936
1,1684,0.196137,0.337797,0.006367
4,0,0.085934,0.146306,0.00629
2,1912,0.186974,0.229295,0.003877
13,348,0.056711,0.037998,0.002348
139,686,0.0421,0.029722,0.002219
984,3980,0.014611,0.02482,0.00217
180,414,0.039376,0.047633,0.0018
823,698,0.01684,0.11533,0.001317


Mức độ trùng nhau giữa Top 10

In [9]:
set_deg = set(top10_degree["NodeID"])
set_bet = set(top10_bet["NodeID"])

overlap = set_deg & set_bet

print("=== Overlap giữa Top 10 Degree và Top 10 Betweenness ===")
print(f"Số nút trùng nhau: {len(overlap)}")
print("Các nút trùng:", overlap if len(overlap) > 0 else "Không có")


=== Overlap giữa Top 10 Degree và Top 10 Betweenness ===
Số nút trùng nhau: 5
Các nút trùng: {0, 107, 3437, 1684, 1912}
