# 01 Data Preparation

Load and clean the OSM-derived grid data, then package it for remote runs.


In [1]:
from pathlib import Path
import sys
import os
import pypsa
import numpy as np

def find_repo_root(max_up=6):
    p = Path.cwd().resolve()
    for _ in range(max_up):
        if (p / 'README.md').exists() or (p / '.git').exists():
            return p
        if p.parent == p:
            break
        p = p.parent
    return Path.cwd().resolve()

repo_root = find_repo_root()
src_path = repo_root / 'src/'
if str(src_path) not in sys.path:
    sys.path.insert(1, str(src_path))
print(f"Using src path: {src_path}")
print(f"Repository root: {repo_root}")

import pypsa_simplified as ps

Using src path: /Users/jedrek/Documents/Studium Volkswirschaftslehre/3. Semester/European Energy Policy/HA/PyPSA---Simplified-European-Model/PyPSA---Simplified-European-Model/src
Repository root: /Users/jedrek/Documents/Studium Volkswirschaftslehre/3. Semester/European Energy Policy/HA/PyPSA---Simplified-European-Model/PyPSA---Simplified-European-Model


## Data sources and parsing
- **OSM prebuilt electricity network** (`data/raw/OSM Prebuilt Electricity Network/`): buses, lines, links, converters, transformers.
- **Custom CSV parsing**: `prepare_osm_source` uses a geometry-safe loader (handles commas inside WKT) to keep column counts correct.
- **Endpoint extraction**: First/last coordinates are pulled from WKT to map line/link endpoints to buses (tolerance 1e-5 degrees).
- **Country filter**: Defaults to DE/FR/PL/AT/IT; adjust via `countries` if needed.


In [2]:
from IPython.core.display import HTML
HTML("<script>Jupyter.notebook.kernel.restart()</script>")
import pandas as pd
from pypsa_simplified import prepare_osm_source
from pypsa_simplified import prepare_generator_data


osm_dir = repo_root / "data" / "raw" / "OSM Prebuilt Electricity Network"
sources = prepare_osm_source(osm_dir)
print({k: v.shape if hasattr(v, 'shape') else v for k, v in sources.items()})

RawData = ps.data_prep.RawData(sources)

source = prepare_generator_data(repo_root / "data" / "raw" / "powerplants.csv")

RawData.data['generators'] = source

{'buses': (6737, 10), 'lines': (8994, 16), 'converters': (67, 7), 'links': (38, 10), 'transformers': (875, 8), 'generators': None, 'loads': None, 'storage': None}


In [11]:
RawData.data['generators']

Unnamed: 0,id,Name,Fueltype,Technology,Set,Country,Capacity,Efficiency,DateIn,DateRetrofit,DateOut,lat,lon,Duration,Volume_Mm3,DamHeight_m,StorageCapacity_MWh,EIC,projectID
0,0.0,Kernkraftwerk Emsland,Nuclear,Steam Turbine,PP,Germany,1336.000,0.33,1988.0,1988.0,2023.0,52.472897,7.324140,,0.0,0.0,0.0,{nan},"{'MASTR': {'MASTR-SEE944567587799'}, 'ENTSOE':..."
1,1.0,Brokdorf,Nuclear,Steam Turbine,PP,Germany,1410.000,0.33,1986.0,1986.0,2021.0,53.850830,9.344720,,0.0,0.0,0.0,{nan},"{'MASTR': {'MASTR-SEE951462745445'}, 'ENTSOE':..."
2,2.0,Borssele,Hard Coal,Steam Turbine,PP,Netherlands,485.000,,1973.0,,2034.0,51.433200,3.716000,,0.0,0.0,0.0,{'49W000000000054X'},"{'BEYONDCOAL': {'BEYOND-NL-2'}, 'ENTSOE': {'49..."
3,3.0,Gemeinschaftskernkraftwerk Neckarwestheim,Nuclear,Steam Turbine,PP,Germany,1310.000,0.33,1976.0,1989.0,2023.0,49.040019,9.176408,,0.0,0.0,0.0,{nan},"{'MASTR': {'MASTR-SEE985577062814'}, 'ENTSOE':..."
4,4.0,Isar,Nuclear,Steam Turbine,PP,Germany,1410.000,0.33,1979.0,1988.0,2023.0,48.605600,12.293150,,0.0,0.0,0.0,{nan},"{'MASTR': {'MASTR-SEE943690268513'}, 'ENTSOE':..."
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
29560,29858.0,Zf Frd Prufstand,Oil,,PP,Germany,6.166,,2000.0,,,47.666864,9.486685,,0.0,0.0,0.0,"{nan, nan, nan, nan}","{'MASTR': {'MASTR-SEE937736737958', 'MASTR-SEE..."
29561,29859.0,Zf Pas Bhkw,Natural Gas,,CHP,Germany,1.182,,2017.0,,,48.603866,13.422240,,0.0,0.0,0.0,{nan},{'MASTR': {'MASTR-SEE954959703560'}}
29562,29860.0,Zf Sbr Prufstand,Oil,,PP,Germany,2.400,,2007.0,,,49.217758,6.973464,,0.0,0.0,0.0,"{nan, nan}","{'MASTR': {'MASTR-SEE948501471533', 'MASTR-SEE..."
29563,29861.0,Zi,Other,,CHP,Germany,47.590,,1980.0,,,52.299865,11.672539,,0.0,0.0,0.0,"{nan, nan}","{'MASTR': {'MASTR-SEE950355589975', 'MASTR-SEE..."


In [None]:
print(np.float64(1/10**6))
len(str(np.format_float_positional(1e-6)).split("."))

1e-06


1

## Serialize for remote processing
The serialized artifact is compact (gzip + pickle) and ready to `scp` to the server.


In [3]:
# Save osm data source inputs
written = RawData.save(output_path=repo_root / "data" / "processed" / "osm_source_data.pkl.gz")
print(f"RawData.save wrote: {written}")

RawData.save wrote: /Users/jedrek/Documents/Studium Volkswirschaftslehre/3. Semester/European Energy Policy/HA/PyPSA---Simplified-European-Model/PyPSA---Simplified-European-Model/data/processed/osm_source_data.pkl.gz


In [4]:
RawData = ps.data_prep.RawData(None)
load_path = repo_root / "data" / "processed" / "osm_source_data.pkl.pkl.gz"
RawData.load(input_path=written)
print(f"Loaded RawData from: {written}")

Loaded RawData from: /Users/jedrek/Documents/Studium Volkswirschaftslehre/3. Semester/European Energy Policy/HA/PyPSA---Simplified-European-Model/PyPSA---Simplified-European-Model/data/processed/osm_source_data.pkl.gz


## Create a network
Use the RawData to create the first network

In [5]:
snapshots = pd.date_range("2024-01-01", "2024-12-31 23:00", freq="h")
NETWORK_METADATA = {
    'name': 'Simplified European Electricity Network',
    'snapshots': snapshots,
    'countries': None,
    'generation_carriers': None,
    'transmission_carriers': None,
}

In [None]:
n = pypsa.Network()
n = ps.build_network(n, RawData, options=NETWORK_METADATA)



Adding 6737 buses...


KeyboardInterrupt: 

Unnamed: 0_level_0,Unnamed: 1_level_0,Optimal Capacity,Installed Capacity,Supply,Withdrawal,Energy Balance,Transmission,Capacity Factor,Curtailment,Capital Expenditure,Operational Expenditure,Revenue,Market Value
component,name,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1


### Next
- Use `notebooks/main.ipynb` to transfer the artifact and trigger the remote optimization.
- For custom country lists or tolerance, pass `countries`/`tol` to `prepare_osm_source`.
