## 1. Setup and Imports

In [1]:
from pathlib import Path
import sys
import pypsa
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import cartopy.crs as ccrs
import importlib

# Find repository root
def find_repo_root(max_up=6):
    p = Path.cwd().resolve()
    for _ in range(max_up):
        if (p / 'README.md').exists() or (p / '.git').exists():
            return p
        if p.parent == p:
            break
        p = p.parent
    return Path.cwd().resolve()

repo_root = find_repo_root()

# Add paths
for path in ['src/', 'scripts/']:
    full_path = str(repo_root / path)
    if full_path not in sys.path:
        sys.path.insert(1, full_path)

import network_clust as netclust

# Output paths
FIG_PATH = repo_root / 'results' / 'figures'
NETWORK_PATH = repo_root / 'data' / 'networks'

print(f"Repository root: {repo_root}")
print(f"Figures: {FIG_PATH}")

Repository root: /Users/jedrek/Documents/Studium Volkswirschaftslehre/3. Semester/European Energy Policy/HA/PyPSA---Simplified-European-Model/PyPSA---Simplified-European-Model
Figures: /Users/jedrek/Documents/Studium Volkswirschaftslehre/3. Semester/European Energy Policy/HA/PyPSA---Simplified-European-Model/PyPSA---Simplified-European-Model/results/figures


## 2. Load Network

In [2]:
# Configuration
JOIN = True   # Use joined network (connects floating buses)
FLOAT_ = True  # Include floating buses

# Load network
network_file = f"S+_sEEN{'_join' if JOIN else ''}{'_f' if FLOAT_ else ''}.nc"
n = pypsa.Network(NETWORK_PATH / 'base' / network_file)

print(f"Loaded: {network_file}")
print(f"  Buses: {len(n.buses)}")
print(f"  Lines: {len(n.lines)}")
print(f"  Links: {len(n.links)}")
print(f"  Countries: {len(n.buses.country.unique())}")

INFO:pypsa.network.io:Imported network 'Simplified European Electricity Network [join] [float] [simple]' has buses, carriers, lines, links, loads, sub_networks


Loaded: S+_sEEN_join_f.nc
  Buses: 3954
  Lines: 5485
  Links: 28
  Countries: 28


## 3. Calculate Cluster Weights

We use total load per bus as the clustering weight. This ensures high-load buses are well-represented in the clustered network.

In [3]:
# Calculate load weights
load_per_bus = n.loads_t.p_set.sum(axis=0)
bus_loads = load_per_bus.groupby(n.loads.bus).sum()
bus_weights = pd.Series(0.0, index=n.buses.index)
bus_weights.loc[bus_loads.index] = bus_loads

# Country statistics
country_stats = pd.DataFrame({
    'buses': n.buses.groupby('country').size(),
    'load_TWh': bus_weights.groupby(n.buses.country).sum() / 1e6,
    'load_pct': (bus_weights.groupby(n.buses.country).sum() / bus_weights.sum() * 100).round(2),
}).sort_values('load_TWh', ascending=False)

print("Top 15 Countries by Load:")
print(country_stats.head(15))
print(f"\nTotal: {len(n.buses)} buses, {bus_weights.sum()/1e6:.2f} TWh")

Top 15 Countries by Load:
         buses     load_TWh  load_pct
country                              
DE         479  2161.531044     17.98
FR         785  1770.646688     14.73
IT         417  1581.725234     13.16
ES         561  1209.768985     10.06
PL         147   976.116074      8.12
RO          91   510.082350      4.24
NL          38   435.799127      3.62
BE          41   296.187696      2.46
GR          26   280.505793      2.33
SE         132   275.064118      2.29
CZ          40   274.739971      2.29
HU          38   262.842586      2.19
AT          60   258.088610      2.15
PT          88   256.368640      2.13
CH         126   245.099008      2.04

Total: 3954 buses, 12022.23 TWh


## 4. Configure Clustering

### Parameters:
- **n_clusters_target**: Total number of clusters (200-500 typical)
- **focus_weights**: Override proportional allocation for specific countries
- **min_clusters_per_country**: Minimum clusters for main sub-network (prevents line loss)

### Focus Weights
Focus weights ensure peripheral countries get adequate representation:
- Specified countries get their stated fraction of total clusters
- Remaining countries share (1 - sum(focus_weights)) proportionally by load
- **PyPSA-EUR method**: Weights are divided across sub-networks per country

In [9]:
# Clustering configuration
n_clusters_target = 500  
# INCREASED: More clusters for better line retention
# Focus weights for underrepresented countries
# These get their specified fraction of total clusters
focus_weights = {'PL': 0.06,  # Poland - important transit country
                 'ES': 0.06,  # Spain - Iberian peninsula
                 'RO': 0.05,  # Romania - SE Europe
                 'SE': 0.05,  # Sweden - Nordic region
                 'PT': 0.03,  # Portugal
                 'GR': 0.03,  # Greece
                 'GB': 0.06,  # UK - many sub-networks, needs boost
                 'NO': 0.04,  # Norway
                }
# Minimum clusters per country's main sub-network
# INCREASED: This ensures main grids get enough clusters
min_clusters_per_country = 5
print(f"Configuration:")
print(f"Target clusters: {n_clusters_target}")
print(f"Focus countries: {list(focus_weights.keys())}")
print(f"Total focus weight: {sum(focus_weights.values()):.0%}")
print(f"Min clusters/country: {min_clusters_per_country}")


Configuration:
Target clusters: 500
Focus countries: ['PL', 'ES', 'RO', 'SE', 'PT', 'GR', 'GB', 'NO']
Total focus weight: 38%
Min clusters/country: 5


## 5. Run Clustering

In [10]:
# Reload module to get latest changes
importlib.reload(netclust)

# Determine network topology (required for sub-network identification)
n.determine_network_topology()

# Step 1: Distribute clusters across countries using Gurobi optimization
print("="*70)
print("Step 1: Distributing clusters to countries (Gurobi optimization)")
print("="*70)

n_clusters_c = netclust.distribute_n_clusters_to_countries(
    n,
    n_clusters_target,
    bus_weights,
    focus_weights=focus_weights,
    solver_name='gurobi',
    min_clusters_per_country=min_clusters_per_country
)

# Display allocation summary
country_totals = n_clusters_c.groupby(level='country').sum().sort_values(ascending=False)
print(f"\nCluster allocation by country (top 15):")
print(country_totals.head(15))

INFO:network_clust:Distributing 500 clusters across countries using gurobi
INFO:network_clust:Applying focus weights for 8 countries
INFO:network_clust:Using custom focus weights for determining number of clusters.
INFO:network_clust:Ensuring minimum 5 clusters for main sub-networks
INFO:linopy.model: Solve problem using Gurobi solver
INFO:linopy.model:Solver options:
 - LogToConsole: 0
 - TimeLimit: 60
 - MIPGap: 0.01
INFO:linopy.io: Writing time: 0.01s


Step 1: Distributing clusters to countries (Gurobi optimization)
Set parameter Username
Set parameter LicenseID to value 2755728
Academic license - for non-commercial use only - expires 2026-12-16
Read LP format model from file /private/var/folders/y8/4_9g68pj7k136q2yypgp5ysc0000gn/T/linopy-problem-qxamsq86.lp
Reading time = 0.00 seconds
obj: 1 rows, 95 columns, 95 nonzeros
Set parameter LogToConsole to value 0


INFO:linopy.constants: Optimization successful: 
Status: ok
Termination condition: optimal
Solution: 95 primals, 0 duals
Objective: -1.41e+04
Solver model: available
Solver message: 2

INFO:network_clust:Optimization successful. Clusters per country (top 10):
country
DE    82
FR    69
IT    61
GB    21
NL    20
DK    18
PL    17
PT    16
SE    15
BE    14
Name: n, dtype: int64
INFO:network_clust:UK cluster allocation:
country  sub_network
GB       14             1
         16             1
         2              6
         23             1
         3              4
         36             1
         44             1
         51             1
         55             1
         57             1
         58             1
         62             1
         7              1
Name: n, dtype: int64



Cluster allocation by country (top 15):
country
DE    82
FR    69
IT    61
GB    21
NL    20
DK    18
PL    17
PT    16
SE    15
BE    14
RO    12
ES    12
AT    11
NO    11
HU    11
Name: n, dtype: int64


In [11]:
# Step 2: Create busmap using K-means clustering
print("="*70)
print("Step 2: Creating busmap (K-means clustering)")
print("="*70)

busmap = netclust.busmap_for_n_clusters(
    n,
    n_clusters_c,
    bus_weights,
    algorithm="kmeans"
)

print(f"\nBusmap created:")
print(f"  Original buses: {len(n.buses)}")
print(f"  Unique clusters: {busmap.nunique()}")

INFO:network_clust:Creating busmap using kmeans algorithm


Step 2: Creating busmap (K-means clustering)


INFO:network_clust:Created busmap with 500 unique clusters



Busmap created:
  Original buses: 3954
  Unique clusters: 500


In [12]:
# Step 3: Apply clustering to create aggregated network
print("="*70)
print("Step 3: Applying clustering")
print("="*70)

clustering = netclust.clustering_for_n_clusters(n, busmap)
n_clustered = clustering.n

print(f"\nClustering complete:")
print(f"  Buses: {len(n.buses)} → {len(n_clustered.buses)} ({100*(1-len(n_clustered.buses)/len(n.buses)):.1f}% reduction)")
print(f"  Lines: {len(n.lines)} → {len(n_clustered.lines)} ({100*(1-len(n_clustered.lines)/len(n.lines)):.1f}% reduction)")
print(f"  Links: {len(n.links)} → {len(n_clustered.links)}")
print(f"  Computational speedup: ~{(len(n.buses)/len(n_clustered.buses))**2:.0f}x")

INFO:network_clust:Performing network clustering


Step 3: Applying clustering


INFO:network_clust:Clustering complete:
  Buses: 3954 -> 500
  Lines: 5485 -> 783
  Links: 28 -> 28



Clustering complete:
  Buses: 3954 → 500 (87.4% reduction)
  Lines: 5485 → 783 (85.7% reduction)
  Links: 28 → 28
  Computational speedup: ~63x


## 6. Analyze Internal Line Retention

Internal lines (within a country) should be preserved during clustering. Low retention indicates clustering is collapsing too many buses.

In [13]:
def count_internal_lines(network):
    """Count internal lines per country."""
    bus_country = network.buses['country']
    results = []
    for country in sorted(bus_country.unique()):
        country_buses = bus_country[bus_country == country].index
        internal = network.lines[
            network.lines['bus0'].isin(country_buses) &
            network.lines['bus1'].isin(country_buses)
        ]
        results.append({'country': country, 'buses': len(country_buses), 'internal_lines': len(internal)})
    return pd.DataFrame(results).set_index('country')

# Compare before/after
original = count_internal_lines(n)
clustered = count_internal_lines(n_clustered)

comparison = pd.DataFrame({
    'buses_before': original['buses'],
    'buses_after': clustered['buses'],
    'lines_before': original['internal_lines'],
    'lines_after': clustered['internal_lines'],
})
comparison['retention_%'] = (comparison['lines_after'] / comparison['lines_before'] * 100).fillna(0).round(1)

# Show countries with significant networks (>50 original lines)
significant = comparison[comparison['lines_before'] >= 50].sort_values('lines_before', ascending=False)

print("Internal Line Retention (countries with 50+ original lines):")
print(significant)

# Highlight any problem countries
problems = significant[significant['retention_%'] < 10]
if len(problems) > 0:
    print(f"\n⚠️  WARNING: {len(problems)} countries with <10% retention:")
    print(problems[['lines_before', 'lines_after', 'retention_%']])

Internal Line Retention (countries with 50+ original lines):
         buses_before  buses_after  lines_before  lines_after  retention_%
country                                                                   
FR                785           69          1175          128         10.9
ES                561           12           783           13          1.7
DE                479           82           654          143         21.9
IT                417           61           558           92         16.5
GB                314           21           409           13          3.2
NO                217           11           249            6          2.4
PL                147           17           224           28         12.5
SE                132           15           171            9          5.3
CH                126           10           165           13          7.9
PT                 88           16           141           30         21.3
RO                 91           12     

In [None]:
# Analyze UK specifically\nuk_buses_orig = n.buses[n.buses['country'] == 'GB']\nuk_buses_clust = n_clustered.buses[n_clustered.buses['country'] == 'GB']\n\nprint(\"UK Sub-Network Analysis:\")\nprint(\"=\"*60)\nprint(f\"\\nOriginal UK buses per sub-network:\")\nprint(uk_buses_orig.groupby('sub_network').size().sort_values(ascending=False))\n\nprint(f\"\\nClustered UK buses per sub-network:\")\nprint(uk_buses_clust.groupby('sub_network').size().sort_values(ascending=False))\n\nprint(f\"\\nUK internal lines:\")\nuk_buses_clust_idx = uk_buses_clust.index\nuk_internal_lines = n_clustered.lines[\n    n_clustered.lines['bus0'].isin(uk_buses_clust_idx) & \n    n_clustered.lines['bus1'].isin(uk_buses_clust_idx)\n]\nprint(f\"  Total: {len(uk_internal_lines)}\")\nprint(f\"\\n  Line details:\")\nfor idx, row in uk_internal_lines.iterrows():\n    print(f\"    {row['bus0']} <-> {row['bus1']} (s_nom={row['s_nom']:.0f} MW)\")

## 7. Visualize Results

In [None]:
# Create comparison plot
fig = plt.figure(figsize=(18, 8))
proj = ccrs.PlateCarree()

# Original network
ax1 = fig.add_subplot(1, 2, 1, projection=proj)
ax1.set_title(f'Simplified Network ({len(n.buses)} buses)', fontsize=14, fontweight='bold')
n.plot(ax=ax1, bus_sizes=0.01, line_widths=0.3, link_widths=0.5, margin=0.05)

# Clustered network
ax2 = fig.add_subplot(1, 2, 2, projection=proj)
ax2.set_title(f'Clustered Network ({len(n_clustered.buses)} buses)', fontsize=14, fontweight='bold')
n_clustered.plot(ax=ax2, bus_sizes=0.1, line_widths=0.5, link_widths=1.0, margin=0.05)

plt.tight_layout()

# Save figure
fig_name = f'network_clustering_{"join" if JOIN else "nojoin"}_{"float" if FLOAT_ else "nofloat"}_gurobi_{n_clusters_target}.png'
plt.savefig(FIG_PATH / fig_name, dpi=300, bbox_inches='tight')
print(f"Saved: {fig_name}")

plt.show()

## 8. Save Clustered Network

In [None]:
# Build filename
save_name = f"C+_sEEN{'_join' if JOIN else ''}{'_f' if FLOAT_ else ''}_cl{n_clusters_target}_gurobi.nc"
save_path = NETWORK_PATH / 'clustered' / save_name

# Save
n_clustered.export_to_netcdf(save_path)
print(f"Saved clustered network: {save_path}")
print(f"  Buses: {len(n_clustered.buses)}")
print(f"  Lines: {len(n_clustered.lines)}")
print(f"  Links: {len(n_clustered.links)}")