In [12]:
import networkx as nx
import pandas as pd
import numpy as np
import os

# 1. Defining Parameters

In [13]:
BASE_PATH = r"./Final_Graph"
GRAPHML_PATH = os.path.join(BASE_PATH, "ml_ready_graph.graphml")

# 2. Graph Loading

In [14]:
G = nx.read_graphml(GRAPHML_PATH)

# 3. Calculating Metrics

In [15]:
total_nodes = G.number_of_nodes()
total_edges = G.number_of_edges()
print(f"Number of Nodes: {total_nodes}")
print(f"Number of Edges: {total_edges}")

Number of Nodes: 48739
Number of Edges: 73076


## 3.1. Number of Connected Components

In [16]:
num_components = nx.number_connected_components(G)

## 3.2. Density

In [17]:
density = nx.density(G)

## 3.3. Clustering Coefficient

In [18]:
G_simple = nx.Graph(G)
clustering_global = nx.average_clustering(G_simple)

In [19]:
print(f"Number of Connected Components: {num_components}")
print(f"Graph Density: {density:.6f}")
print(f"Mean Clustering Coefficient: {clustering_global:.4f}")

Number of Connected Components: 1
Graph Density: 0.000062
Mean Clustering Coefficient: 0.0354


## 3.4. Centrality

In [20]:
degree_centrality = nx.degree_centrality(G)

## 3.5.  Betweenness

In [21]:
num_nodes = G.number_of_nodes()
K_SAMPLES = min(5000, max(500, int(num_nodes * 0.10)))

print(f"Sampling k={K_SAMPLES} for Betweenness Calculation.")

betweenness_centrality = nx.betweenness_centrality(
    G, 
    k=K_SAMPLES,
    weight='length',
    normalized=True,
    seed=42
)

Sampling k=4873 for Betweenness Calculation.


In [22]:
# betweenness_centrality = nx.betweenness_centrality(G, weight='length')

## 3.6. Closeness

In [24]:
closeness_centrality = nx.closeness_centrality(
    G,
    distance='length'
)

In [26]:
nodes_df = pd.DataFrame({
    'node_id': list(G.nodes()),
    'degree_centrality': [degree_centrality[n] for n in G.nodes()],
    'betweenness_centrality': [betweenness_centrality[n] for n in G.nodes()],
    'closeness_centrality': [closeness_centrality[n] for n in G.nodes()],
})

In [27]:
nodes_df.head(10)

Unnamed: 0,node_id,degree_centrality,betweenness_centrality,closeness_centrality
0,"(333543.4992779681, 7398639.456472073)",6.2e-05,0.001009,0.01399
1,"(333505.9441709011, 7398611.718505675)",6.2e-05,0.000257,0.013798
2,"(333532.3047175107, 7398555.910928236)",6.2e-05,0.001402,0.014185
3,"(333615.75092158274, 7398689.472791225)",6.2e-05,0.000728,0.013799
4,"(333679.6703566854, 7398681.769958133)",6.2e-05,6.3e-05,0.013793
5,"(333668.34047255793, 7398706.38338146)",6.2e-05,4.1e-05,0.01361
6,"(333742.5070595381, 7398583.127264978)",6.2e-05,7.5e-05,0.01398
7,"(333654.73178283765, 7398480.01810501)",6.2e-05,0.000174,0.014582
8,"(333678.4853853023, 7398487.375790649)",6.2e-05,0.000114,0.014375
9,"(333625.93611473066, 7398478.683431058)",6.2e-05,2.1e-05,0.014378


In [28]:
np.mean(list(degree_centrality.values()))

np.float64(6.15262498613826e-05)

In [30]:
np.mean(list(closeness_centrality.values()))

np.float64(0.014129446842845774)

In [29]:
np.mean(list(betweenness_centrality.values()))   

np.float64(0.0014920473856168252)