In [1]:
import numpy as np
import pandas as pd
import xarray as xr

In [2]:
WINDOW_HOURS = 24 * 7  # 7 días
STEP_HOURS = 24 * 3    # stride de 3 días (puedes cambiar)
EPS = 1e-9

In [3]:
def _idx_to_str(index_like):
    if hasattr(index_like, "to_pandas"):
        index_like = index_like.to_pandas()
    if isinstance(index_like, pd.MultiIndex):
        return index_like.map(lambda tpl: '::'.join(map(str, tpl))).astype(str)
    return pd.Index(index_like).astype(str)

In [4]:
def _sum_sel(da, dim_name, mask_func):
    labels = _idx_to_str(da[dim_name])
    sel_labels = labels[mask_func(labels)].tolist()
    if dim_name in da.dims and sel_labels:
        return float(da.sel({dim_name: sel_labels}).sum())
    return 0.0

In [5]:
def _series_time(da, dim_name, mask_func):
    labels = _idx_to_str(da[dim_name])
    sel_labels = labels[mask_func(labels)].tolist()
    if dim_name in da.dims and sel_labels:
        ts = da.sel({dim_name: sel_labels}).sum(dim=dim_name)  # suma sobre loc_tech_carriers...
        return ts
    # devolver zeros con las mismas horas si no hay
    return xr.zeros_like(da.isel({dim_name: 0}, drop=True))


In [6]:
def _energy_cap(ds, pat):
    s = ds.energy_cap.to_series()
    if s.empty: return 0.0
    idx = _idx_to_str(s.index).str.contains(pat)
    return float(s[idx].sum())

In [7]:
def _storage_cap(ds, pat):
    s = ds.storage_cap.to_series() if 'storage_cap' in ds else pd.Series(dtype=float)
    if s.empty: return 0.0
    idx = _idx_to_str(s.index).str.contains(pat)
    return float(s[idx].sum())

In [10]:
def _cost_sum(ds, pat):
    s = ds.cost.to_series() if 'cost' in ds else pd.Series(dtype=float)
    if s.empty: return 0.0
    idx = _idx_to_str(s.index).str.contains(pat)
    return float(s[idx].sum())

In [11]:
def _window_slices(nT, win, step):
    starts = list(range(0, max(1, nT - win + 1), step))
    if not starts or (starts[-1] + win < nT):
        starts.append(max(0, nT - win))
    return [(s, s + win) for s in starts]

In [12]:
def build_windows(nc_path: str, out_parquet: str):
    ds = xr.load_dataset(nc_path)

    # --- Series temporales clave ---
    da_con  = ds.carrier_con
    da_prod = ds.carrier_prod

    # Demanda H2 servida (convención: demand_* en con, portador hydrogen; suele ser negativa)
    h2_dem_con_ts = _series_time(
        da_con, 'loc_tech_carriers_con',
        lambda lab: lab.str.endswith('::hydrogen') & lab.str.contains('demand_h2')
    )
    h2_served_ts = -1.0 * h2_dem_con_ts  # positivizamos

    # Producción H2 (electrolizador)
    h2_prod_ts = _series_time(
        da_prod, 'loc_tech_carriers_prod',
        lambda lab: lab.str.endswith('::hydrogen') & lab.str.contains('electrolyzer')
    )

    # Electricidad consumida por electrólisis
    elec_to_e_ts = _series_time(
        da_con, 'loc_tech_carriers_con',
        lambda lab: lab.str.endswith('::electricity') & lab.str.contains('electrolyzer')
    ).pipe(lambda x: xr.apply_ufunc(np.abs, x))

    # Agua consumida por electrólisis (si existe)
    if (da_con['loc_tech_carriers_con'].to_pandas().astype(str).str.endswith('::water').any()):
        water_to_e_ts = _series_time(
            da_con, 'loc_tech_carriers_con',
            lambda lab: lab.str.endswith('::water') & lab.str.contains('electrolyzer')
        ).pipe(lambda x: xr.apply_ufunc(np.abs, x))
    else:
        water_to_e_ts = xr.zeros_like(h2_served_ts)

    # Storage SoC agregado (si existe variable 'storage')
    if 'storage' in ds:
        storage_ts = ds.storage.sum(dim=[d for d in ds.storage.dims if d != 'timesteps'])
    else:
        storage_ts = xr.zeros_like(h2_served_ts)

    # --- Capacidades (estáticas) ---
    cap_pv   = _energy_cap(ds, r'::pv$')
    cap_el   = _energy_cap(ds, r'electrolyzer')
    cap_line = _energy_cap(ds, r'ac_line')
    cap_h2st = _storage_cap(ds, r'h2_store')

    # --- Costos agregados (estáticos al horizonte) ---
    total_cost = float(ds.cost.sum()) if 'cost' in ds else np.nan

    # --- Construcción de ventanas ---
    t = ds['timesteps']
    nT = t.sizes['timesteps']
    slices = _window_slices(nT, WINDOW_HOURS, STEP_HOURS)

    rows = []
    for (a, b) in slices:
        # recortes
        sl = slice(a, b)
        _h2_serv   = float(h2_served_ts.isel(timesteps=sl).sum())
        _h2_prod   = float(h2_prod_ts.isel(timesteps=sl).sum())
        _elec_e    = float(elec_to_e_ts.isel(timesteps=sl).sum())
        _water_e   = float(water_to_e_ts.isel(timesteps=sl).sum())
        _soc_min   = float(storage_ts.isel(timesteps=sl).min())
        _soc_p50   = float(storage_ts.isel(timesteps=sl).quantile(0.5))
        _soc_p95   = float(storage_ts.isel(timesteps=sl).quantile(0.95))

        # proxies útiles
        demand_proxy = _h2_serv  # si tienes serie de demanda objetivo separada, cámbiala aquí
        unmet = max(0.0, 0.0 - (_h2_serv - demand_proxy))  # placeholder si no tienes demanda explícita

        # ratios
        sp_h2   = _h2_prod / (_elec_e + EPS)                  # eficiencia específica H2/kWh_elec
        water_h2= _water_e / (_h2_prod + EPS)                 # agua por MWh_H2

        # CF electrolizador aproximado por ventana (si hay cap_el)
        cf_el = (_h2_prod / (cap_el * (b - a) + EPS)) if cap_el > 0 else np.nan

        rows.append({
            # --- targets/proxies de resiliencia (medibles por ventana) ---
            "h2_served_MWh": _h2_serv,
            "h2_unmet_proxy": unmet,
            "soc_min": _soc_min,
            "soc_p50": _soc_p50,
            "soc_p95": _soc_p95,

            # --- performance/uso ---
            "h2_produced_MWh": _h2_prod,
            "elec_to_e_MWh": _elec_e,
            "water_to_e_units": _water_e,
            "sp_h2_MWh_per_MWh_elec": sp_h2,
            "water_per_MWh_h2": water_h2,
            "cf_electrolyzer_approx": cf_el,

            # --- capacidades estáticas (replicadas en cada ventana) ---
            "cap_pv_MW": cap_pv,
            "cap_el_MW": cap_el,
            "cap_line_MW": cap_line,
            "cap_h2_store_MWh": cap_h2st,

            # --- costos agregados (horizonte) ---
            "total_cost": total_cost,

            # --- metadatos ---
            "t_start_idx": a,
            "t_end_idx": b
        })

    df = pd.DataFrame(rows)

    # -------- NORMALIZACIÓN SENCILLA --------
    # (ajústala a tus necesidades; aquí dejamos escalas razonables para arrancar)
    scale_by = df[["h2_served_MWh", "h2_produced_MWh"]].quantile(0.95).max()
    scale_by = float(scale_by) if np.isfinite(scale_by) and scale_by > 0 else 1.0

    for col in ["h2_served_MWh","h2_produced_MWh","elec_to_e_MWh","cap_pv_MW",
                "cap_el_MW","cap_line_MW","cap_h2_store_MWh"]:
        if col in df.columns:
            df[col] = df[col] / (scale_by + EPS)

    # Guardar
    df.to_parquet(out_parquet, index=False)
    print(f"[OK] Ventanas guardadas en {out_parquet} | shape={df.shape}")


In [14]:
nc_path = "results_baseline_2028_tx.nc"
out_path = "resilience/windows.parquet"

build_windows(nc_path, out_path)

[OK] Ventanas guardadas en resilience/windows.parquet | shape=(243, 18)
