# 01 Data Preparation

Load and clean the OSM-derived grid data, then package it for remote runs.


In [2]:
from pathlib import Path
import sys
import os

def find_repo_root(max_up=6):
    p = Path.cwd().resolve()
    for _ in range(max_up):
        if (p / 'README.md').exists() or (p / '.git').exists():
            return p
        if p.parent == p:
            break
        p = p.parent
    return Path.cwd().resolve()

repo_root = find_repo_root()
src_path = repo_root / 'src/'
if str(src_path) not in sys.path:
    sys.path.insert(1, str(src_path))
print(f"Using src path: {src_path}")
print(f"Repository root: {repo_root}")

import pypsa_simplified as ps

Using src path: /Users/jedrek/Documents/Studium Volkswirschaftslehre/3. Semester/European Energy Policy/HA/PyPSA---Simplified-European-Model/PyPSA---Simplified-European-Model/src
Repository root: /Users/jedrek/Documents/Studium Volkswirschaftslehre/3. Semester/European Energy Policy/HA/PyPSA---Simplified-European-Model/PyPSA---Simplified-European-Model


## Data sources and parsing
- **OSM prebuilt electricity network** (`data/raw/OSM Prebuilt Electricity Network/`): buses, lines, links, converters, transformers.
- **Custom CSV parsing**: `prepare_osm_source` uses a geometry-safe loader (handles commas inside WKT) to keep column counts correct.
- **Endpoint extraction**: First/last coordinates are pulled from WKT to map line/link endpoints to buses (tolerance 1e-5 degrees).
- **Country filter**: Defaults to DE/FR/PL/AT/IT; adjust via `countries` if needed.


In [None]:
from IPython.core.display import HTML
HTML("<script>Jupyter.notebook.kernel.restart()</script>")
import pandas as pd
from src.pypsa_simplified import prepare_osm_source

osm_dir = repo_root / "data" / "raw" / "OSM Prebuilt Electricity Network"
sources = prepare_osm_source(osm_dir)
print({k: v.shape if hasattr(v, 'shape') else v for k, v in sources.items()})

OSMData = ps.data_prep.OSMData(sources)

{'buses': (6737, 10), 'lines': (8994, 16), 'converters': (67, 7), 'links': (38, 10), 'transformers': (875, 8), 'generators': None, 'loads': None, 'storage': None}


## Serialize for remote processing
The serialized artifact is compact (gzip + pickle) and ready to `scp` to the server.


In [3]:
# Save osm data source inputs
written = OSMData.save(output_path=repo_root / "data" / "processed" / "osm_source_data.pkl.gz")
print(f"OSMData.save wrote: {written}")

OSMData.save wrote: /Users/jedrek/Documents/Studium Volkswirschaftslehre/3. Semester/European Energy Policy/HA/PyPSA---Simplified-European-Model/PyPSA---Simplified-European-Model/data/processed/osm_source_data.pkl.gz


In [4]:
OSMData = ps.data_prep.OSMData(None)
load_path = repo_root / "data" / "processed" / "osm_source_data.pkl.pkl.gz"
OSMData.load(input_path=written)
print(f"Loaded OSMData from: {written}")

Loaded OSMData from: /Users/jedrek/Documents/Studium Volkswirschaftslehre/3. Semester/European Energy Policy/HA/PyPSA---Simplified-European-Model/PyPSA---Simplified-European-Model/data/processed/osm_source_data.pkl.gz


## Create a network
Use the OSMdata to create the first network

In [1]:
import pypsa
n = pypsa.Network()

ImportError: cannot import name 'ComponentType' from 'pypsa.definitions.components' (/opt/homebrew/anaconda3/envs/base_pip/lib/python3.12/site-packages/pypsa/definitions/components.py)

### Next
- Use `notebooks/main.ipynb` to transfer the artifact and trigger the remote optimization.
- For custom country lists or tolerance, pass `countries`/`tol` to `prepare_osm_source`.
