# Swiss Dataset Metrics Evaluation

This notebook loads the complete Swiss rail network (Swisstopo data) and calculates key metrics defined in the proposal (Efficiency and Resilience).

Metrics calculated:
1. **Global Efficiency**
2. **Local Efficiency**
3. **Average Clustering Coefficient**
4. **Average Path Length** (on Largest Connected Component)
5. **Resilience** (Robustness to random node failures)
6. **Efficiency Decay - Random Failure**
7. **Efficiency Decay - Targeted Attacks** (Degree and Betweenness Centrality)

Results are stored in `swiss_metrics_results.json`.

In [37]:
from pathlib import Path
import pandas as pd
import geopandas as gpd
import networkx as nx
import numpy as np
import json
import time

In [38]:
DATA_SOURCE = "swisstopo"
BASE_DIR = Path("../datasets/switzerland")
SWISSTOPO_GDB_PATH = BASE_DIR / "schienennetz_2056_de.gdb"
METRICS_OUTPUT_PATH = "../metrics/switzerland/swiss_metrics_results.json"

In [39]:
def flatten_lines(geom):
    if geom is None or geom.is_empty:
        return []
    coords = []
    if geom.geom_type == 'LineString':
        coords.extend((pt[1], pt[0]) for pt in geom.coords)
    elif geom.geom_type == 'MultiLineString':
        for line in geom.geoms:
            coords.extend((pt[1], pt[0]) for pt in line.coords)
    return coords

In [40]:
print("Loading Swisstopo data...")
net_segments = gpd.read_file(SWISSTOPO_GDB_PATH, layer='Netzsegment')
net_nodes = gpd.read_file(SWISSTOPO_GDB_PATH, layer='Netzknoten')

nodes_gdf = net_nodes.to_crs(4326)
segments_gdf = net_segments # Assuming already compatible or will be used for topology
# segments_wgs84 = segments_gdf.to_crs(4326) # Not strictly needed for topology construction unless extracting coords

G = nx.Graph()

print("Building Graph...")
for _, row in nodes_gdf.iterrows():
    node_id = row['xtf_id']
    label = row.get('Betriebspunkt_Name') or node_id
    lat = row.geometry.y
    lon = row.geometry.x
    G.add_node(node_id, label=label, lat=lat, lon=lon, source='swisstopo')

for _, row in segments_gdf.iterrows():
    u = row['rAnfangsknoten']
    v = row['rEndknoten']
    if pd.isna(u) or pd.isna(v):
        continue
    if u not in G.nodes or v not in G.nodes:
        # print(f"Skipping edge {u}-{v} (nodes not found)")
        continue
    
    # Calculate length if possible, otherwise use 1 or shape length if available
    weight = row.geometry.length if hasattr(row.geometry, 'length') else 1.0
    
    G.add_edge(u, v, weight=weight, source='swisstopo')

print(f"Graph built: {G.number_of_nodes()} nodes, {G.number_of_edges()} edges")

Loading Swisstopo data...
Building Graph...
Graph built: 3210 nodes, 3377 edges


In [41]:
# Metrics usually make sense on the largest connected component for transport networks
largest_cc = max(nx.connected_components(G), key=len)
G_lcc = G.subgraph(largest_cc).copy()
print(f"Largest Connected Component: {G_lcc.number_of_nodes()} nodes, {G_lcc.number_of_edges()} edges")

Largest Connected Component: 1687 nodes, 1843 edges


In [42]:
print("Calculating Efficiency Metrics (this may take a while)...")
start_time = time.time()

# Global Efficiency
global_eff = nx.global_efficiency(G_lcc)
print(f"Global Efficiency: {global_eff:.4f}")

# Local Efficiency
local_eff = nx.local_efficiency(G_lcc)
print(f"Local Efficiency: {local_eff:.4f}")

# Average Path Length
avg_path_len = nx.average_shortest_path_length(G_lcc, weight='weight') # Weighted by distance usually
avg_path_len_topo = nx.average_shortest_path_length(G_lcc) # Topological
print(f"Avg Path Length (Topological): {avg_path_len_topo:.4f}")
print(f"Avg Path Length (Weighted): {avg_path_len:.4f}")

# Clustering Coefficient
avg_clustering = nx.average_clustering(G_lcc)
print(f"Avg Clustering Coefficient: {avg_clustering:.4f}")

print(f"Done in {time.time() - start_time:.2f} seconds")

Calculating Efficiency Metrics (this may take a while)...
Global Efficiency: 0.0282
Local Efficiency: 0.0233
Avg Path Length (Topological): 58.3268
Avg Path Length (Weighted): 147601.6278
Avg Clustering Coefficient: 0.0223
Done in 2.85 seconds


In [43]:
print("Calculating Resilience (Random Failures - LCC Size)...")
def calculate_robustness(graph, fraction, num_simulations=10):
    results = []
    n = graph.number_of_nodes()
    num_to_remove = int(n * fraction)
    
    for _ in range(num_simulations):
        G_temp = graph.copy()
        nodes_to_remove = np.random.choice(list(G_temp.nodes()), num_to_remove, replace=False)
        G_temp.remove_nodes_from(nodes_to_remove)
        if G_temp.number_of_nodes() > 0:
            largest_cc = max(nx.connected_components(G_temp), key=len)
            results.append(len(largest_cc) / n) # Relative size of LCC
        else:
            results.append(0.0)
    return np.mean(results)

fractions = [0.0, 0.05, 0.1, 0.2, 0.3, 0.4, 0.5]
robustness_scores = {}

for f in fractions:
    score = calculate_robustness(G_lcc, f)
    robustness_scores[f] = score
    print(f"Fraction removed: {f}, LCC Size (relative): {score:.4f}")

Calculating Resilience (Random Failures - LCC Size)...
Fraction removed: 0.0, LCC Size (relative): 1.0000
Fraction removed: 0.05, LCC Size (relative): 0.5780
Fraction removed: 0.1, LCC Size (relative): 0.2831
Fraction removed: 0.2, LCC Size (relative): 0.0529
Fraction removed: 0.3, LCC Size (relative): 0.0247
Fraction removed: 0.4, LCC Size (relative): 0.0145
Fraction removed: 0.5, LCC Size (relative): 0.0101


In [None]:
print("Calculating Efficiency Decay (Random Failure)...")
def calculate_efficiency_decay_random(graph, fraction, num_simulations=1000):
    results = []
    n = graph.number_of_nodes()
    num_to_remove = int(n * fraction)
    
    for _ in range(num_simulations):
        G_temp = graph.copy()
        nodes_to_remove = np.random.choice(list(G_temp.nodes()), num_to_remove, replace=False)
        G_temp.remove_nodes_from(nodes_to_remove)
        
        eff = nx.global_efficiency(G_temp)
        results.append(eff)
        
    return np.mean(results)

eff_decay_random = {}
for f in fractions:
    score = calculate_efficiency_decay_random(G_lcc, f)
    eff_decay_random[f] = score
    print(f"Fraction removed: {f}, Global Eff (Random): {score:.4f}")

Calculating Efficiency Decay (Random Failure)...


In [None]:
print("Calculating Efficiency Decay (Targeted Attacks)...")

# Pre-calculate centralities for Static strategies
print("Calculating centrality measures...")
degree_cent = nx.degree_centrality(G_lcc)
betweenness_cent = nx.betweenness_centrality(G_lcc) # Expensive, but done once

sorted_nodes_degree = sorted(degree_cent, key=degree_cent.get, reverse=True)
sorted_nodes_betweenness = sorted(betweenness_cent, key=betweenness_cent.get, reverse=True)

def calculate_efficiency_decay_targeted(graph, fraction, sorted_nodes_list):
    n = graph.number_of_nodes()
    num_to_remove = int(n * fraction)
    
    nodes_to_remove = sorted_nodes_list[:num_to_remove]
    
    G_temp = graph.copy()
    G_temp.remove_nodes_from(nodes_to_remove)
    
    return nx.global_efficiency(G_temp)

eff_decay_degree = {}
eff_decay_betweenness = {}

for f in fractions:
    # Degree Targeted
    score_deg = calculate_efficiency_decay_targeted(G_lcc, f, sorted_nodes_degree)
    eff_decay_degree[f] = score_deg
    
    # Betweenness Targeted
    score_bet = calculate_efficiency_decay_targeted(G_lcc, f, sorted_nodes_betweenness)
    eff_decay_betweenness[f] = score_bet
    
    print(f"Fraction removed: {f} | Degree Target Eff: {score_deg:.4f} | Betweenness Target Eff: {score_bet:.4f}")

Calculating Efficiency Decay (Targeted Attacks)...
Calculating centrality measures...
Fraction removed: 0.0 | Degree Target Eff: 0.0282 | Betweenness Target Eff: 0.0282
Fraction removed: 0.05 | Degree Target Eff: 0.0066 | Betweenness Target Eff: 0.0191
Fraction removed: 0.1 | Degree Target Eff: 0.0039 | Betweenness Target Eff: 0.0148
Fraction removed: 0.2 | Degree Target Eff: 0.0021 | Betweenness Target Eff: 0.0088
Fraction removed: 0.3 | Degree Target Eff: 0.0020 | Betweenness Target Eff: 0.0063
Fraction removed: 0.4 | Degree Target Eff: 0.0019 | Betweenness Target Eff: 0.0052
Fraction removed: 0.5 | Degree Target Eff: 0.0020 | Betweenness Target Eff: 0.0049


In [None]:
metrics = {
    "num_nodes": G_lcc.number_of_nodes(),
    "num_edges": G_lcc.number_of_edges(),
    "global_efficiency": global_eff,
    "local_efficiency": local_eff,
    "average_path_length_topological": avg_path_len_topo,
    "average_path_length_weighted": avg_path_len,
    "average_clustering_coefficient": avg_clustering,
    "robustness_random_failure": robustness_scores,
    "efficiency_decay_random_failure": eff_decay_random,
    "efficiency_decay_targeted_degree": eff_decay_degree,
    "efficiency_decay_targeted_betweenness": eff_decay_betweenness
}

with open(METRICS_OUTPUT_PATH, 'w') as f:
    json.dump(metrics, f, indent=4)

print(f"Metrics saved to {METRICS_OUTPUT_PATH}")

Metrics saved to swiss_metrics_results.json
