In [1]:
import pandas as pd
import networkx as nx
import numpy as np

edges = pd.read_csv('hero-network.csv')  

G = nx.from_pandas_edgelist(
    edges,
    source='hero1',
    target='hero2',
    create_using=nx.Graph()
)

num_nodes = G.number_of_nodes()
num_edges = G.number_of_edges()

deg_cent = nx.degree_centrality(G)
cent_values = np.array(list(deg_cent.values()))

avg_cent = cent_values.mean()
std_cent = cent_values.std(ddof=0)
min_cent = cent_values.min()
max_cent = cent_values.max()

node_max = max(deg_cent, key=deg_cent.get)
node_min = min(deg_cent, key=deg_cent.get)
nodes_near_avg = sorted(
    deg_cent,
    key=lambda n: abs(deg_cent[n] - avg_cent)
)[:5]

print(f"Total nodes:               {num_nodes}")
print(f"Total edges:               {num_edges}")
print(f"Avg degree centrality:     {avg_cent:.6f}")
print(f"Standard dev centrality:   {std_cent:.6f}")
print(f"Min centrality:            {min_cent:.6f}")
print(f"Max centrality:            {max_cent:.6f}")
print()
print(f"Node with max centrality:  {node_max} ({deg_cent[node_max]:.6f})")
print(f"Node with min centrality:  {node_min} ({deg_cent[node_min]:.6f})")
print("Top 5 nodes closest to avg centrality:")
for n in nodes_near_avg:
    print(f"  {n} ({deg_cent[n]:.6f})")


Total nodes:               6426
Total edges:               167219
Avg degree centrality:     0.008100
Standard dev centrality:   0.017689
Min centrality:            0.000156
Max centrality:            0.296965

Node with max centrality:  CAPTAIN AMERICA (0.296965)
Node with min centrality:  AZRAEL (0.000156)
Top 5 nodes closest to avg centrality:
  JONES, TAMMY ANNE (0.008093)
  JONES, LORRAINE LORR (0.008093)
  JONES, DANIEL DANNY (0.008093)
  NEVILLE, KATE (0.008093)
  PEARSON, MARCY (0.008093)


In [2]:
from itertools import combinations

deg_dict = dict(G.degree())

degree_to_nodes = {}
for node, deg in deg_dict.items():
    degree_to_nodes.setdefault(deg, []).append(node)

results = []
for k, nodes in sorted(degree_to_nodes.items()):
    if len(nodes) < 2:
        # skip degrees with fewer than 2 nodes
        continue

    dists = []
    for u, v in combinations(nodes, 2):
        try:
            d = nx.shortest_path_length(G, source=u, target=v)
            dists.append(d)
        except nx.NetworkXNoPath:
            # if no path exists, you can choose to count it as np.nan,
            # or skip it; here we skip
            pass

    if len(dists) > 0:
        avg_dist = np.mean(dists)
        var_dist = np.var(dists, ddof=0)  # population variance
    else:
        avg_dist = np.nan
        var_dist = np.nan

    results.append({
        'degree': k,
        'n_nodes': len(nodes),
        'avg_distance': avg_dist,
        'var_distance': var_dist
    })

# 6. Summarize in a pandas DataFrame and print
df_summary = pd.DataFrame(results).set_index('degree')
print(df_summary)

        n_nodes  avg_distance  var_distance
degree                                     
1            53      3.270376      0.264671
2            86      3.260739      0.260606
3           104      3.226475      0.264056
4           150      3.104609      0.222704
5           182      3.139336      0.249968
...         ...           ...           ...
497           2      1.000000      0.000000
526           2      1.000000      0.000000
539           2      1.000000      0.000000
585           2      1.000000      0.000000
922           2      1.000000      0.000000

[242 rows x 3 columns]


In [3]:
results = []
for k, nodes in sorted(degree_to_nodes.items()):
    if len(nodes) < 2:
        continue  # skip degrees with fewer than 2 nodes

    sims = []
    for u, v in combinations(nodes, 2):
        nbrs_u = set(G.neighbors(u))
        nbrs_v = set(G.neighbors(v))
        # if degree k > 0, compute cosine similarity; otherwise skip
        if k > 0:
            sim = len(nbrs_u & nbrs_v) / np.sqrt(k * k)
            sims.append(sim)

    if sims:
        avg_sim  = np.mean(sims)
        std_sim  = np.std(sims, ddof=0)   # population standard deviation
    else:
        avg_sim, std_sim = np.nan, np.nan

    results.append({
        'degree':    k,
        'n_nodes':   len(nodes),
        'avg_sim':   avg_sim,
        'std_sim':   std_sim
    })

# 5. Summarize in a DataFrame and print
df_struct_eq = pd.DataFrame(results).set_index('degree')
print(df_struct_eq)

        n_nodes   avg_sim   std_sim
degree                             
1            53  0.023948  0.152886
2            86  0.014227  0.087931
3           104  0.011265  0.073284
4           150  0.017651  0.085114
5           182  0.016356  0.087057
...         ...       ...       ...
497           2  0.382294  0.000000
526           2  0.359316  0.000000
539           2  0.278293  0.000000
585           2  0.213675  0.000000
922           2  0.427332  0.000000

[242 rows x 3 columns]
