In [1]:
import pandas as pd
import networkx as nx
import numpy as np
from itertools import combinations

# 1/
df = pd.read_csv('edges.csv')  

G = nx.Graph()

for comic, group in df.groupby('comic'):
    heroes = list(group['hero'])
    for pair in combinations(heroes, 2):
        if G.has_edge(*pair):
            G.edges[pair]['weight'] += 1
        else:
            G.add_edge(*pair, weight=1)

num_nodes = G.number_of_nodes()
num_edges = G.number_of_edges()
degree_sequence = [d for _, d in G.degree()]
avg_degree = sum(degree_sequence) / num_nodes
std_degree = pd.Series(degree_sequence).std()
min_degree = min(degree_sequence)
max_degree = max(degree_sequence)

degree_centrality = nx.degree_centrality(G)
dc_series = pd.Series(degree_centrality)

max_dc = dc_series.idxmax(), dc_series.max()
min_dc = dc_series.idxmin(), dc_series.min()
avg_diff = (dc_series - dc_series.mean()).abs()
close_to_avg = dc_series[avg_diff.nsmallest(3).index].items()

print(f"Network Statistics:")
print(f"Total Nodes: {num_nodes}")
print(f"Total Edges: {num_edges}")
print(f"Average Degree: {avg_degree:.3f}")
print(f"Standard Deviation: {std_degree:.3f}")
print(f"Minimum Degree: {min_degree}")
print(f"Maximum Degree: {max_degree}\n")

print(f"Most Connected: {max_dc[0]} ({max_dc[1]:.3f})")
print(f"Least Connected: {min_dc[0]} ({min_dc[1]:.3f})")
print("\nClose to Average Centrality:")
for char, score in close_to_avg:
    print(f"{char}: {score:.3f}")

Network Statistics:
Total Nodes: 6421
Total Edges: 171644
Average Degree: 53.463
Standard Deviation: 116.755
Minimum Degree: 1
Maximum Degree: 1919

Most Connected: CAPTAIN AMERICA (0.299)
Least Connected: MASTER OF VENGEANCE (0.000)

Close to Average Centrality:
CAPTAIN HAWK: 0.008
GROTESK: 0.008
ISHIMA, DAVID: 0.008


In [2]:
# 2/
deg_dict = dict(G.degree())

degree_to_nodes = {}
for node, deg in deg_dict.items():
    degree_to_nodes.setdefault(deg, []).append(node)

results = []
for k, nodes in sorted(degree_to_nodes.items()):
    if len(nodes) < 2:
        continue

    dists = []
    for u, v in combinations(nodes, 2):
        try:
            d = nx.shortest_path_length(G, source=u, target=v)
            dists.append(d)
        except nx.NetworkXNoPath:
            pass

    if len(dists) > 0:
        avg_dist = np.mean(dists)
        var_dist = np.var(dists, ddof=0)
    else:
        avg_dist = np.nan
        var_dist = np.nan

    results.append({
        'degree': k,
        'n_nodes': len(nodes),
        'avg_distance': avg_dist,
        'var_distance': var_dist
    })

df_summary = pd.DataFrame(results).set_index('degree')
print(df_summary)

        n_nodes  avg_distance  var_distance
degree                                     
1            52      3.243067      0.250870
2            85      3.260224      0.259734
3           102      3.228888      0.268907
4           147      3.108937      0.226601
5           179      3.138158      0.250512
...         ...           ...           ...
492           2      1.000000      0.000000
550           3      1.000000      0.000000
743           2      1.000000      0.000000
1088          2      1.000000      0.000000
1133          2      1.000000      0.000000

[257 rows x 3 columns]


In [3]:
# 3/
results = []
for k, nodes in sorted(degree_to_nodes.items()):
    if len(nodes) < 2:
        continue

    sims = []
    for u, v in combinations(nodes, 2):
        nbrs_u = set(G.neighbors(u))
        nbrs_v = set(G.neighbors(v))
        if k > 0:
            sim = len(nbrs_u & nbrs_v) / np.sqrt(k * k)
            sims.append(sim)

    if sims:
        avg_sim  = np.mean(sims)
        std_sim  = np.std(sims, ddof=0)
    else:
        avg_sim, std_sim = np.nan, np.nan

    results.append({
        'degree':    k,
        'n_nodes':   len(nodes),
        'avg_sim':   avg_sim,
        'std_sim':   std_sim
    })

df_struct_eq = pd.DataFrame(results).set_index('degree')
print(df_struct_eq)

        n_nodes   avg_sim   std_sim
degree                             
1            52  0.024887  0.155780
2            85  0.014006  0.087447
3           102  0.011713  0.074693
4           147  0.017380  0.084092
5           179  0.016559  0.088022
...         ...       ...       ...
492           2  0.247967  0.000000
550           3  0.441212  0.038750
743           2  0.530283  0.000000
1088          2  0.442096  0.000000
1133          2  0.806708  0.000000

[257 rows x 3 columns]
