# 01 Data Preparation

Load and clean the OSM-derived grid data, then package it for remote runs.


In [None]:
from pathlib import Path
import sys
import pypsa
import pandas as pd


def find_repo_root(max_up=6):
    p = Path.cwd().resolve()
    for _ in range(max_up):
        if (p / 'README.md').exists() or (p / '.git').exists():
            return p
        if p.parent == p:
            break
        p = p.parent
    return Path.cwd().resolve()

repo_root = find_repo_root()
src_path = repo_root / 'src/'
if str(src_path) not in sys.path:
    sys.path.insert(1, str(src_path))
print(f"Using src path: {src_path}")
print(f"Repository root: {repo_root}")

import pypsa_simplified as ps

src_path = repo_root / 'scripts/'
if str(src_path) not in sys.path:
    sys.path.insert(1, str(src_path))

def ifjoin(n: pypsa.Network) -> bool:
    """Helper function to conditionally join network buses."""
    return "[join]" in str(n.name)

def iffloat(n: pypsa.Network) -> bool:
    """Helper function to conditionally join network buses."""
    return "[float]" in str(n.name)


Using src path: /Users/jedrek/Documents/Studium Volkswirschaftslehre/3. Semester/European Energy Policy/HA/PyPSA---Simplified-European-Model/PyPSA---Simplified-European-Model/src
Repository root: /Users/jedrek/Documents/Studium Volkswirschaftslehre/3. Semester/European Energy Policy/HA/PyPSA---Simplified-European-Model/PyPSA---Simplified-European-Model


## Data sources and parsing
- **OSM prebuilt electricity network** (`data/raw/OSM Prebuilt Electricity Network/`): buses, lines, links, converters, transformers.
- **Custom CSV parsing**: `prepare_osm_source` uses a geometry-safe loader (handles commas inside WKT) to keep column counts correct.
- **Endpoint extraction**: First/last coordinates are pulled from WKT to map line/link endpoints to buses (tolerance 1e-5 degrees).
- **Country filter**: Defaults to DE/FR/PL/AT/IT; adjust via `countries` if needed.


In [None]:
from IPython.core.display import HTML
HTML("<script>Jupyter.notebook.kernel.restart()</script>")
from pypsa_simplified import prepare_osm_source

osm_dir = repo_root / "data" / "raw" / "OSM Prebuilt Electricity Network"
sources = prepare_osm_source(osm_dir)
print({k: v.shape if hasattr(v, 'shape') else v for k, v in sources.items()})

RawData = ps.data_prep.RawData(sources)

{'buses': (6737, 10), 'lines': (8994, 16), 'converters': (67, 7), 'links': (38, 10), 'transformers': (875, 8), 'generators': None, 'loads': None, 'storage': None}


In [3]:
EU27 = [
    'AT', 'BE', 'BG', 'HR', 'CZ', 'DK', 'EE', 'EL', 'FI', 'FR',
    'DE', 'GR', 'HU', 'IE', 'IT', 'LV', 'LT', 'LU', 'MT', 'NL',
    'PL', 'PT', 'RO', 'SK', 'SI', 'ES', 'SE'
] + ['UK', 'CH', 'NO', 'GB']  # include UK, CH, NO for practical purposes

## Create a network
Use the RawData to create the first network

In [4]:
snapshots = pd.date_range("2021-01-01", "2024-12-31 23:00", freq="h")
NETWORK_METADATA = {
    'name': 'Simplified European Electricity Network [national] [float]',
    'snapshots': snapshots,
    'countries': EU27,
    'generation_carriers': None,
    'transmission_carriers': None,
    'join': False,
}

In [5]:
n = pypsa.Network()
n = ps.build_network(n, RawData, options=NETWORK_METADATA)

Adding 6295 buses...
Adding 8385 lines...
Adding 812 transformers...
Adding 67 converters...
Adding 37 links...


In [6]:
n

PyPSA Network 'Simplified European Electricity Network [national] [float]'
--------------------------------------------------------------------------
Components:
 - Bus: 6359
 - Carrier: 15
 - Line: 8385
 - Link: 103
 - Transformer: 812
Snapshots: 35064

In [None]:
join = ifjoin(n)
float_ = iffloat(n)

network_path = repo_root / "data" / "networks" / "base" / f"sEEN{"_join" if join else ""}{"_f" if float_ else ""}.nc"

n.export_to_netcdf(network_path)

NameError: name 'ifjoin' is not defined

In [None]:
n = pypsa.Network(network_path)

INFO:pypsa.network.io:Imported network 'Simplified European Electricity Network [join] [float]' has buses, carriers, lines, links, loads, transformers


In [10]:
n.name

'Simplified European Electricity Network [national] [float]'

### Next
- Use `notebooks/main.ipynb` to transfer the artifact and trigger the remote optimization.
- For custom country lists or tolerance, pass `countries`/`tol` to `prepare_osm_source`.
