# Daten laden

Dieses Notebook lädt die Rohdaten ein und speichert gefilterte Kopien für den Kanton Zürich.


In [None]:

from pathlib import Path
import os
import json
import pandas as pd
import geopandas as gpd

# Projektpfade
project_root = Path('..').resolve()
data_dir = project_root / 'datasets'
output_dir = project_root / 'outputs'
processed_dir = output_dir / 'processed'
maps_dir = output_dir / 'maps'
figures_dir = output_dir / 'figures'

for folder in [output_dir, processed_dir, maps_dir, figures_dir]:
    folder.mkdir(parents=True, exist_ok=True)
    
print(f"Output directories available: {output_dir}")


In [None]:

# Dateipfade
energy_path = data_dir / 'energyreporter_municipality_historized.csv'
wealth_path = data_dir / 'data_7354970.csv'
geojson_path = data_dir / 'zh-municipalities.geojson'

energy = pd.read_csv(energy_path, parse_dates=['energyreporter_date'])
wealth = pd.read_csv(wealth_path, sep=';')

geo = None
if geojson_path.exists():
    try:
        with open(geojson_path, 'r') as f:
            geojson_data = json.load(f)
        geo = gpd.read_file(geojson_path)
        bfs_nrs = [feature.get('id', idx) for idx, feature in enumerate(geojson_data.get('features', []))]
        geo['bfs_nr'] = pd.to_numeric(bfs_nrs, errors='coerce').astype('Int64')
        print(f"GeoJSON loaded: {len(geo)} features")
    except Exception as exc:
        print(f"Warning: GeoJSON could not be loaded ({exc})")
        geo = None
else:
    print('Info: GeoJSON not found; maps will be skipped until provided.')

print('Shapes:', energy.shape, wealth.shape)


In [None]:

# Filter auf Kanton Zürich und Harmonisierung der Schlüssel
energy_zh = energy.loc[energy['canton'] == 'ZH', [
    'bfs_nr',
    'municipality',
    'energyreporter_date',
    'elec_consumption_mwh_per_year_per_capita',
    'elec_consumption_households_mwh_per_year_per_capita'
]].copy()
energy_zh['bfs_nr'] = energy_zh['bfs_nr'].astype(int)

wealth_small = wealth.loc[:, ['BFS_NR', 'GEBIET_NAME', 'INDIKATOR_ID', 'INDIKATOR_NAME', 'INDIKATOR_JAHR', 'INDIKATOR_VALUE']].copy()
wealth_small['BFS_NR'] = wealth_small['BFS_NR'].astype(int)

# Export vorbereiteter Rohdaten
energy_zh.to_csv(processed_dir / 'energy_zh.csv', index=False)
wealth_small.to_csv(processed_dir / 'wealth_small.csv', index=False)
if geo is not None:
    geo.to_file(processed_dir / 'zh_municipalities.geojson', driver='GeoJSON')

print('Saved energy_zh.csv and wealth_small.csv to outputs/processed')
