<a href="https://colab.research.google.com/github/esb-index/Barka-AV/blob/main/Untitled0.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!mkdir -p /content/era5_processed_light && mv /content/*_daily_light.csv /content/era5_processed_light/


In [None]:
# 6-hazard generator + E, V, r számítás (Colab cella)
import os, glob
import pandas as pd
import numpy as np

# ---------- BEÁLLÍTÁSOK ----------
INPUT_DIR = "/content"
OUTPUT_DIR = "/content/cpri_outputs"
ASSETS_PATH = "/content/cpri_outputs/assets.xlsx"
os.makedirs(OUTPUT_DIR, exist_ok=True)

# region súlyok (ahogy megadtad)
region_weights = {
    "dania": 0.8, "denmark": 0.8,
    "nemet": 0.6, "germany": 0.6,
    "uk": 0.9, "netherlands": 0.9,
    "usa": 1.0, "united states": 1.0,
    "tajvan": 1.0, "taiwan": 1.0
}

# fizikai (károkozó) küszöbök (ha van ilyen fizikailag értelmezett)
ABS_THRESH = {
    "flood": {"tp_mm_day": 50.0},               # napi csapadék > 50 mm -> potenciálisan károkozó
    "windstorm": {"wind_ms": 27.78},           # ~100 km/h -> káros vihar
    "heatwave": {"temp_C": 35.0},              # napi közép > 35°C
    "coldwave": {"temp_C": -10.0},             # napi közép < -10°C (példa)
    "snowstorm": {"sd_m": 0.2},                # hó > 20 cm
    "solaranomaly": {"ssrd_high": None, "ssrd_low": None}  # általában percentilis-alapú, fizikai küszöb kevésbé egyértelmű
}

# V0 lookup (ahogy megadtad)
type_map = {
    ("wind", "offshore"): 0.6, ("wind", "onshore"): 0.4,
    ("solar", "pv"): 0.3, ("hydro", "-"): 0.5,
    ("biomass", "-"): 0.4, ("gas", "-"): 0.7,
    ("coal", "-"): 0.7, ("nuclear", "-"): 0.8,
    ("hydrogen", "-"): 0.9, ("storage", "battery"): 0.7,
    ("grid", "transmission"): 0.6,
}

def get_V0(row):
    key = (str(row.get("type","")).strip().lower(), str(row.get("subtype","")).strip().lower())
    return type_map.get(key, 0.5)

# ---------- HELPER FUNKCIÓK ----------
def detect_kelvin_and_to_c(series):
    """Ha átlag >200 -> Kelvin, konvertáljuk C-re"""
    if series.dropna().empty:
        return series
    if series.mean() > 200:
        return series - 273.15
    return series

def winsorize_series(s, lower_q=0.01, upper_q=0.99):
    vals = s.dropna()
    if vals.empty:
        return s
    lo = vals.quantile(lower_q)
    hi = vals.quantile(upper_q)
    return s.clip(lower=lo, upper=hi)

def minmax_norm(s):
    if s.dropna().empty:
        return s
    mn = s.min()
    mx = s.max()
    if np.isclose(mx, mn):
        return s*0.0
    return (s - mn) / (mx - mn)

# ---------- 1) megtaláljuk a daily_merged fájlokat ----------
daily_files = glob.glob(os.path.join(INPUT_DIR, "*_daily_merged.csv"))
print("Talált daily_merged fájlok:", [os.path.basename(f) for f in daily_files])
if not daily_files:
    raise SystemExit("Nincs *_daily_merged.csv fájl a /content mappában. Töltsd fel őket!")

# ---------- 2) feldolgozzuk minden fájlt region/év bontásban ----------
# aggregáljuk a 6 hazard-ot region-year szinten
hazard_rows = {
    "flood": [], "windstorm": [], "heatwave": [], "coldwave": [], "snowstorm": [], "solaranomaly": []
}

for f in daily_files:
    fname = os.path.basename(f)
    region = fname.split("_daily_merged.csv")[0].lower()
    print("Feldolgozás:", region, "->", fname)
    df = pd.read_csv(f)
    # kis-nagybetűs oszlopok
    df.columns = [c.strip() for c in df.columns]
    cols_low = {c.lower(): c for c in df.columns}
    # kötelező: date
    if 'date' not in cols_low:
        print("  !!! Nincs 'date' oszlop a", fname, "- fájlban. Kihagyom.")
        continue
    df['date'] = pd.to_datetime(df[cols_low['date']], errors='coerce')
    df = df.dropna(subset=['date']).copy()
    df['year'] = df['date'].dt.year

    # standardizált oszlopváltozók lekérése (ha vannak)
    def col(c):
        return cols_low.get(c.lower(), None)

    # előkészítés: szélsebesség, t2m_C, stb.
    # Wind speed: prefer u10_max_mean & v10_max_mean
    if col('u10_max_mean') and col('v10_max_mean'):
        u = df[col('u10_max_mean')].fillna(0.0)
        v = df[col('v10_max_mean')].fillna(0.0)
        df['wind_speed'] = np.sqrt(u*u + v*v)
    # temperature to C
    if col('t2m_mean_mean'):
        tcol = col('t2m_mean_mean')
        df['t2m_C'] = detect_kelvin_and_to_c(df[tcol].astype(float))
    # snow depth in meters?
    if col('sd_mean_mean'):
        df['sd_m'] = df[col('sd_mean_mean')].astype(float)
    # solar ssrd
    if col('ssrd_mean_mean'):
        df['ssrd'] = df[col('ssrd_mean_mean')].astype(float)
    # precipitation
    if col('tp_sum_mean'):
        df['tp_mm'] = df[col('tp_sum_mean')].astype(float)
    # sea surface temp (sst) if needed
    if col('sst_mean_mean'):
        df['sst'] = df[col('sst_mean_mean')].astype(float)

    years = sorted(df['year'].unique())
    for yr in years:
        sub = df[df['year']==yr]
        days = len(sub)
        if days == 0:
            continue

        # --- FLOOD ---
        if 'tp_mm' in sub.columns:
            H_raw = sub['tp_mm'].max() if sub['tp_mm'].dropna().size>0 else np.nan
            # P_raw - elsődlegesen fizikai küszöb (ABS_THRESH), ha nincs, használunk 95%-os küszöb
            phys = ABS_THRESH['flood'].get('tp_mm_day', None)
            if phys is not None:
                P_raw = (sub['tp_mm'] > phys).sum() / days
            else:
                thr95 = df['tp_mm'].dropna().quantile(0.95)
                P_raw = (sub['tp_mm'] > thr95).sum() / days
        else:
            H_raw = np.nan; P_raw = np.nan
        hazard_rows['flood'].append({'region':region,'year':int(yr),'H_raw':float(H_raw) if not pd.isna(H_raw) else np.nan,'P_raw':float(P_raw) if not pd.isna(P_raw) else np.nan})

        # --- WINDSTORM ---
        if 'wind_speed' in sub.columns:
            H_raw = sub['wind_speed'].max() if sub['wind_speed'].dropna().size>0 else np.nan
            phys = ABS_THRESH['windstorm'].get('wind_ms', None)
            if phys is not None:
                P_raw = (sub['wind_speed'] > phys).sum() / days
            else:
                thr95 = df['wind_speed'].dropna().quantile(0.95)
                P_raw = (sub['wind_speed'] > thr95).sum() / days
        else:
            H_raw = np.nan; P_raw = np.nan
        hazard_rows['windstorm'].append({'region':region,'year':int(yr),'H_raw':float(H_raw) if not pd.isna(H_raw) else np.nan,'P_raw':float(P_raw) if not pd.isna(P_raw) else np.nan})

        # --- HEATWAVE ---
        if 't2m_C' in sub.columns:
            H_raw = sub['t2m_C'].max() if sub['t2m_C'].dropna().size>0 else np.nan
            phys = ABS_THRESH['heatwave'].get('temp_C', None)
            if phys is not None:
                P_raw = (sub['t2m_C'] > phys).sum() / days
            else:
                thr95 = df['t2m_C'].dropna().quantile(0.95)
                P_raw = (sub['t2m_C'] > thr95).sum() / days
        else:
            H_raw = np.nan; P_raw = np.nan
        hazard_rows['heatwave'].append({'region':region,'year':int(yr),'H_raw':float(H_raw) if not pd.isna(H_raw) else np.nan,'P_raw':float(P_raw) if not pd.isna(P_raw) else np.nan})

        # --- COLDWAVE (extrém hideg) ---
        if 't2m_C' in sub.columns:
            H_raw = sub['t2m_C'].min() if sub['t2m_C'].dropna().size>0 else np.nan
            phys = ABS_THRESH['coldwave'].get('temp_C', None)
            if phys is not None:
                P_raw = (sub['t2m_C'] < phys).sum() / days
            else:
                thr05 = df['t2m_C'].dropna().quantile(0.05)
                P_raw = (sub['t2m_C'] < thr05).sum() / days
        else:
            H_raw = np.nan; P_raw = np.nan
        # For H_raw for coldwave we store the absolute cold severity (negative allowed)
        hazard_rows['coldwave'].append({'region':region,'year':int(yr),'H_raw':float(H_raw) if not pd.isna(H_raw) else np.nan,'P_raw':float(P_raw) if not pd.isna(P_raw) else np.nan})

        # --- SNOWSTORM ---
        if 'sd_m' in sub.columns:
            H_raw = sub['sd_m'].max() if sub['sd_m'].dropna().size>0 else np.nan
            phys = ABS_THRESH['snowstorm'].get('sd_m', None)
            if phys is not None:
                P_raw = (sub['sd_m'] > phys).sum() / days
            else:
                thr95 = df['sd_m'].dropna().quantile(0.95)
                P_raw = (sub['sd_m'] > thr95).sum() / days
        else:
            H_raw = np.nan; P_raw = np.nan
        hazard_rows['snowstorm'].append({'region':region,'year':int(yr),'H_raw':float(H_raw) if not pd.isna(H_raw) else np.nan,'P_raw':float(P_raw) if not pd.isna(P_raw) else np.nan})

        # --- SOLAR ANOMALY (mindkét irány) ---
        if 'ssrd' in sub.columns:
            # H_raw: éves abs max eltérés a középértéktől (szigetelt jelzés)
            mean_ssrd = df['ssrd'].dropna().mean() if df['ssrd'].dropna().size>0 else 0.0
            H_raw = (sub['ssrd'] - mean_ssrd).abs().max() if sub['ssrd'].dropna().size>0 else np.nan
            # P_raw: ha van fizikai küszöb nemzetközi szinten, használjuk. Egyébként percentilis kétoldalúan.
            thr_high = df['ssrd'].dropna().quantile(0.95) if df['ssrd'].dropna().size>0 else np.nan
            thr_low = df['ssrd'].dropna().quantile(0.05) if df['ssrd'].dropna().size>0 else np.nan
            P_raw = ((sub['ssrd'] > thr_high).sum() + (sub['ssrd'] < thr_low).sum()) / days
        else:
            H_raw = np.nan; P_raw = np.nan
        hazard_rows['solaranomaly'].append({'region':region,'year':int(yr),'H_raw':float(H_raw) if not pd.isna(H_raw) else np.nan,'P_raw':float(P_raw) if not pd.isna(P_raw) else np.nan})

    print(f"  -> kész: {region}, évek: {len(years)}")

# ---------- 3) winsorize és normalizálás regiononként, majd mentés ----------
def postprocess_hazard(rows, hazard_name):
    dfh = pd.DataFrame(rows)
    if dfh.empty:
        print("!! Üres:", hazard_name)
        return dfh
    out_list = []
    for region, g in dfh.groupby('region'):
        gr = g.copy().sort_values('year').reset_index(drop=True)
        # winsorize raw values (H_raw, P_raw) 1-99 percentile
        for col in ['H_raw','P_raw']:
            if col in gr.columns:
                gr[col + "_win"] = winsorize_series(gr[col])
        # normalize (min-max) on the winsorized values per region
        if 'H_raw_win' in gr.columns:
            gr['H_norm'] = minmax_norm(gr['H_raw_win'])
        if 'P_raw_win' in gr.columns:
            gr['P_norm'] = minmax_norm(gr['P_raw_win'])
        out_list.append(gr)
    df_out = pd.concat(out_list, ignore_index=True)
    # select and reorder
    cols_keep = [c for c in ['region','year','H_raw','P_raw','H_raw_win','P_raw_win','H_norm','P_norm'] if c in df_out.columns]
    df_out = df_out[cols_keep]
    outpath = os.path.join(OUTPUT_DIR, f"hazard_yearly_{hazard_name}.csv")
    df_out.to_csv(outpath, index=False)
    print(f"✅ Mentve: {outpath} (sor: {len(df_out)})")
    return df_out

hz_dfs = {}
for hz in hazard_rows.keys():
    hz_dfs[hz] = postprocess_hazard(hazard_rows[hz], hz)

# ---------- 4) E és V számítása, r kiszámítása ----------
# E: (H_norm + P_norm)/2 * region_weight  (region_weight a region string alapján)
# V: assets alapján
if not os.path.exists(ASSETS_PATH):
    raise SystemExit("assets.xlsx nincs a megadott helyen! Töltsd fel a /content/assets.xlsx fájlba.")

assets = pd.read_excel(ASSETS_PATH, dtype=str)
assets.columns = [c.strip() for c in assets.columns]
# biztosítsuk a kötelező oszlopokat és típusokat
if 'asset_id' not in assets.columns:
    raise SystemExit("assets.xlsx nem tartalmaz 'asset_id' oszlopot.")
for col in ['capacity_MW','commission_year','latitude','longitude']:
    if col in assets.columns:
        assets[col] = pd.to_numeric(assets[col], errors='coerce')

# normalizáljuk az országneveket egyszerű string->region mappingre (ha szükséges)
def normalize_country_to_region(c):
    if pd.isna(c): return np.nan
    s = str(c).strip().lower()
    mapping = {
        'denmark':'dania','denmark ':'dania','dk':'dania',
        'germany':'nemet','germany ':'nemet','de':'nemet',
        'united kingdom':'uk','uk':'uk','england':'uk','gb':'uk',
        'netherlands':'netherlands','nl':'netherlands',
        'taiwan':'tajvan','tw':'tajvan',
        'usa':'usa','united states':'usa','us':'usa','america':'usa'
    }
    return mapping.get(s, s)

assets['region_norm'] = assets['country'].apply(normalize_country_to_region)

# V számítása (V0 + age + maintenance)
assets['V0'] = assets.apply(get_V0, axis=1)
assets['commission_year'] = pd.to_numeric(assets.get('commission_year', pd.Series([np.nan]*len(assets))), errors='coerce')
assets['age_factor'] = 2025 - assets['commission_year'].fillna(2025)
assets['maintenance'] = 0.7
# a formula, amit adtál: V=clip(V0 * (1+0.3*age_factor) * (1-0.4*maintenance),0,1)
assets['V'] = np.clip(assets['V0'] * (1 + 0.3 * (assets['age_factor']) ) * (1 - 0.4 * assets['maintenance']), 0, 1)

assets[['asset_id','region_norm','V0','age_factor','maintenance','V']].to_csv(os.path.join(OUTPUT_DIR,'vulnerability_matrix.csv'), index=False)
print("✅ vulnerability_matrix mentve")

# összeállítjuk az r sorokat: minden hazard df-t párosítunk az assets regionnel
r_rows = []
for hz, dfhz in hz_dfs.items():
    if dfhz is None or dfhz.empty:
        continue
    for _, a in assets.iterrows():
        region = a['region_norm']
        subset = dfhz[dfhz['region'] == region]
        if subset.empty:
            # nincs hazard adat erre a régióra -> kihagyjuk
            continue
        for _, s in subset.iterrows():
            Hn = s.get('H_norm', np.nan)
            Pn = s.get('P_norm', np.nan)
            E = ((s.get('H_norm', np.nan) + s.get('P_norm', np.nan))/2.0) * region_weights.get(region, 0.8) if (not pd.isna(s.get('H_norm')) and not pd.isna(s.get('P_norm'))) else np.nan
            V = a.get('V', np.nan)
            r_val = Hn * Pn * E * V if (not pd.isna(Hn) and not pd.isna(Pn) and not pd.isna(E) and not pd.isna(V)) else np.nan
            r_rows.append({
                'asset_id': a['asset_id'],
                'hazard': hz,
                'region': region,
                'year': int(s['year']),
                'H_norm': Hn,
                'P_norm': Pn,
                'E': E,
                'V': V,
                'r': r_val
            })

df_r = pd.DataFrame(r_rows)
out_r = os.path.join(OUTPUT_DIR, "r_values_enhanced.csv")
df_r.to_csv(out_r, index=False)
print(f"✅ Mentve r_values_enhanced: {out_r}  (sorok: {len(df_r)})")

# ---------- 5) rövid összegzés ----------
print("\n--- ÖSSZEGZÉS ---")
for hz in hz_dfs:
    dfhz = hz_dfs[hz]
    print(f"{hz}: fájl sorok: {len(dfhz) if dfhz is not None else 0}")
print("assets:", len(assets))
print("r sorok:", len(df_r))
print("Készen. Ellenőrizd a mappát:", OUTPUT_DIR)


Talált daily_merged fájlok: ['usa_daily_merged.csv', 'nemet_daily_merged.csv', 'tajvan_daily_merged.csv', 'dania_daily_merged.csv', 'uk_daily_merged.csv']
Feldolgozás: usa -> usa_daily_merged.csv
  -> kész: usa, évek: 24
Feldolgozás: nemet -> nemet_daily_merged.csv
  -> kész: nemet, évek: 24
Feldolgozás: tajvan -> tajvan_daily_merged.csv
  -> kész: tajvan, évek: 24
Feldolgozás: dania -> dania_daily_merged.csv
  -> kész: dania, évek: 24
Feldolgozás: uk -> uk_daily_merged.csv
  -> kész: uk, évek: 24
✅ Mentve: /content/cpri_outputs/hazard_yearly_flood.csv (sor: 120)
✅ Mentve: /content/cpri_outputs/hazard_yearly_windstorm.csv (sor: 120)
✅ Mentve: /content/cpri_outputs/hazard_yearly_heatwave.csv (sor: 120)
✅ Mentve: /content/cpri_outputs/hazard_yearly_coldwave.csv (sor: 120)
✅ Mentve: /content/cpri_outputs/hazard_yearly_snowstorm.csv (sor: 120)
✅ Mentve: /content/cpri_outputs/hazard_yearly_solaranomaly.csv (sor: 120)
✅ vulnerability_matrix mentve
✅ Mentve r_values_enhanced: /content/cpri_ou

In [None]:
# ============================================================
# 🌍 Ørsted – CPRI index: teljes pipeline
# Lépések:
# 1️⃣ MICE imputáció (daily_merged → daily_imputed)
# 2️⃣ Éves hazardok számítása
# 3️⃣ Exposure (E) és Vulnerability (V)
# 4️⃣ r_values_enhanced.csv mentése
# ============================================================

import pandas as pd
import numpy as np
import os
from sklearn.experimental import enable_iterative_imputer
from sklearn.impute import IterativeImputer

# --- PARAMÉTEREK ---
input_dir = "/content/cpri_outputs"
os.makedirs(input_dir, exist_ok=True)

region_weights = {
    "dania": 0.8, "denmark": 0.8,
    "nemet": 0.6, "germany": 0.6,
    "uk": 0.9, "netherlands": 0.9,
    "usa": 1.0, "united states": 1.0,
    "tajvan": 1.0, "taiwan": 1.0
}

# Vulnerability alapértékek
type_map = {
    ("wind", "offshore"): 0.6, ("wind", "onshore"): 0.4,
    ("solar", "pv"): 0.3, ("hydro", "-"): 0.5,
    ("biomass", "-"): 0.4, ("gas", "-"): 0.7,
    ("coal", "-"): 0.7, ("nuclear", "-"): 0.8,
    ("hydrogen", "-"): 0.9, ("storage", "battery"): 0.7,
    ("grid", "transmission"): 0.6,
}

# --- 1️⃣ MICE IMPUTÁCIÓ ---
print("🔧 MICE imputáció indul...")
daily_files = [f for f in os.listdir(input_dir) if f.endswith("_daily_merged.csv")]
imputer = IterativeImputer(random_state=42, max_iter=10)

for f in daily_files:
    path = os.path.join(input_dir, f)
    df = pd.read_csv(path)
    num = df.select_dtypes(include=[np.number])
    if num.empty:
        continue
    num_imputed = imputer.fit_transform(num)
    df[num.columns] = num_imputed
    out_path = path.replace("_merged.csv", "_imputed.csv")
    df.to_csv(out_path, index=False)
    print(f"✅ Imputált: {out_path} ({len(df)} sor)")

# --- 2️⃣ HAZARD DERIVÁLÁS ---
print("\n🌪 Hazard deriválás folyamatban...")
hazards = ["flood", "windstorm", "heatwave", "coldwave", "snowstorm", "solar_anomaly"]
hazard_dfs = []

for f in os.listdir(input_dir):
    if not f.endswith("_daily_imputed.csv"):
        continue

    region = f.split("_")[0].lower()
    df = pd.read_csv(os.path.join(input_dir, f))
    df["year"] = pd.to_datetime(df["date"]).dt.year
    grouped = df.groupby("year").agg("mean").reset_index()

    # --- H_raw és P_raw számítás ---
    # P_raw = extrém esemény valószínűsége (95. percentil feletti napok aránya)
    def compute_P_raw(series):
        threshold = np.nanpercentile(series, 95)
        return np.mean(series > threshold)

    h_vars = {
        "flood": "tp_sum_mean",
        "windstorm": ["u10_max_mean", "v10_max_mean"],
        "heatwave": "t2m_mean_mean",
        "coldwave": "t2m_mean_mean",
        "snowstorm": "sd_mean_mean",
        "solar_anomaly": "ssrd_mean_mean"
    }

    for hz, cols in h_vars.items():
        df_h = grouped.copy()
        if isinstance(cols, list):
            df_h["H_raw"] = np.sqrt(df[cols[0]]**2 + df[cols[1]]**2)
        else:
            df_h["H_raw"] = df[cols]

        df_h["P_raw"] = compute_P_raw(df_h["H_raw"])

        # Normálás 0–1 közé
        df_h["H_norm"] = (df_h["H_raw"] - df_h["H_raw"].min()) / (df_h["H_raw"].max() - df_h["H_raw"].min() + 1e-9)
        df_h["P_norm"] = (df_h["P_raw"] - df_h["P_raw"].min()) / (df_h["P_raw"].max() - df_h["P_raw"].min() + 1e-9)

        df_h["hazard"] = hz
        df_h["region"] = region
        df_h["region_weight"] = region_weights.get(region, 1.0)
        df_h["E"] = (df_h["H_norm"] + df_h["P_norm"]**2) * df_h["region_weight"]

        hazard_dfs.append(df_h[["hazard", "region", "year", "H_norm", "P_norm", "E"]])

hazard_all = pd.concat(hazard_dfs, ignore_index=True)
hazard_all.to_csv(f"{input_dir}/exposure_matrix.csv", index=False)
print(f"✅ Exposure mentve ({len(hazard_all)} sor)")

# --- 3️⃣ VULNERABILITY ---
print("\n🧩 Vulnerability számítás...")
assets = pd.read_excel(f"{input_dir}/assets.xlsx")

def get_V0(row):
    key = (row["type"].lower(), str(row["subtype"]).lower() if pd.notna(row["subtype"]) else "-")
    return type_map.get(key, 0.5)

assets["V0"] = assets.apply(get_V0, axis=1)
assets["age_factor"] = 2025 - assets["commission_year"]
assets["maintenance"] = 0.7
assets["V"] = np.clip(
    assets["V0"] * (1 + 0.3 * assets["age_factor"] / 50) * (1 - 0.4 * assets["maintenance"]),
    0, 1
)
assets.to_csv(f"{input_dir}/vulnerability_matrix.csv", index=False)
print(f"✅ Vulnerability mentve ({len(assets)} asset)")

# --- 4️⃣ r VALUES ENHANCED ---
print("\n🧮 r értékek számítása...")
merged = []
for _, hz_row in hazard_all.iterrows():
    region = hz_row["region"]
    hz = hz_row["hazard"]
    year = hz_row["year"]
    subset = assets[assets["country"].str.lower().str.contains(region[:2], na=False)]
    for _, a in subset.iterrows():
        r_val = hz_row["H_norm"] * hz_row["P_norm"] * hz_row["E"] * a["V"]
        merged.append({
            "asset_id": a["asset_id"],
            "hazard": hz,
            "region": region,
            "year": year,
            "H_norm": hz_row["H_norm"],
            "P_norm": hz_row["P_norm"],
            "E": hz_row["E"],
            "V": a["V"],
            "r": r_val
        })

r_df = pd.DataFrame(merged)
r_df.to_csv(f"{input_dir}/r_values_enhanced.csv", index=False)
print(f"✅ Mentve: {input_dir}/r_values_enhanced.csv ({len(r_df)} sor)")

print("\n🎯 KÉSZ – Minden lépés sikeresen lefutott.")


🔧 MICE imputáció indul...
✅ Imputált: /content/cpri_outputs/usa_daily_imputed.csv (8766 sor)
✅ Imputált: /content/cpri_outputs/nemet_daily_imputed.csv (8766 sor)
✅ Imputált: /content/cpri_outputs/tajvan_daily_imputed.csv (8766 sor)
✅ Imputált: /content/cpri_outputs/dania_daily_imputed.csv (8766 sor)
✅ Imputált: /content/cpri_outputs/uk_daily_imputed.csv (8766 sor)

🌪 Hazard deriválás folyamatban...


TypeError: agg function failed [how->mean,dtype->object]

In [None]:
from sklearn.experimental import enable_iterative_imputer
from sklearn.impute import IterativeImputer
import pandas as pd, numpy as np, os

input_dir = "/content/cpri_outputs"
imputer = IterativeImputer(random_state=42, max_iter=10)

for f in os.listdir(input_dir):
    if f.endswith("_daily_merged.csv"):
        df = pd.read_csv(f"{input_dir}/{f}")
        num = df.select_dtypes(include=[np.number])
        if not num.empty:
            num_imputed = imputer.fit_transform(num)
            df[num.columns] = num_imputed
        df.to_csv(f"{input_dir}/{f.replace('_merged.csv', '_imputed.csv')}", index=False)
        print(f"✅ {f} → imputált fájl mentve.")


✅ usa_daily_merged.csv → imputált fájl mentve.
✅ nemet_daily_merged.csv → imputált fájl mentve.
✅ tajvan_daily_merged.csv → imputált fájl mentve.
✅ dania_daily_merged.csv → imputált fájl mentve.
✅ uk_daily_merged.csv → imputált fájl mentve.


In [None]:
import os
import pandas as pd
import numpy as np
from sklearn.experimental import enable_iterative_imputer
from sklearn.impute import IterativeImputer

# --- Mappák ---
input_dir = "/content/cpri_outputs"
os.makedirs(input_dir, exist_ok=True)

# --- MICE imputáló előkészítése ---
imputer = IterativeImputer(random_state=42, max_iter=10, sample_posterior=True)

# --- Fájlok keresése ---
files = [f for f in os.listdir(input_dir) if f.endswith("_daily_merged.csv")]
if not files:
    print("❌ Nincsenek *_daily_merged.csv fájlok a cpri_outputs mappában!")
else:
    print(f"📂 Talált fájlok: {files}")

# --- Minden fájl feldolgozása ---
for f in files:
    file_path = os.path.join(input_dir, f)
    try:
        print(f"\n🔧 Feldolgozás: {f}")
        df = pd.read_csv(file_path)

        # Csak numerikus oszlopok
        num_cols = df.select_dtypes(include=[np.number]).columns
        if len(num_cols) == 0:
            print(f"⚠️ Nincs numerikus adat: {f}")
            continue

        # MICE imputálás
        imputed = imputer.fit_transform(df[num_cols])
        df[num_cols] = imputed

        out_path = os.path.join(input_dir, f.replace("_merged.csv", "_imputed.csv"))
        df.to_csv(out_path, index=False)
        print(f"✅ Imputált fájl mentve: {out_path}")

    except Exception as e:
        print(f"💀 Hiba a {f} fájlnál: {e}")

print("\n🏁 KÉSZ — nézd meg a cpri_outputs mappát, ott lesznek az új *_daily_imputed.csv fájlok.")


📂 Talált fájlok: ['usa_daily_merged.csv', 'nemet_daily_merged.csv', 'tajvan_daily_merged.csv', 'dania_daily_merged.csv', 'uk_daily_merged.csv']

🔧 Feldolgozás: usa_daily_merged.csv
✅ Imputált fájl mentve: /content/cpri_outputs/usa_daily_imputed.csv

🔧 Feldolgozás: nemet_daily_merged.csv
✅ Imputált fájl mentve: /content/cpri_outputs/nemet_daily_imputed.csv

🔧 Feldolgozás: tajvan_daily_merged.csv
✅ Imputált fájl mentve: /content/cpri_outputs/tajvan_daily_imputed.csv

🔧 Feldolgozás: dania_daily_merged.csv
✅ Imputált fájl mentve: /content/cpri_outputs/dania_daily_imputed.csv

🔧 Feldolgozás: uk_daily_merged.csv
✅ Imputált fájl mentve: /content/cpri_outputs/uk_daily_imputed.csv

🏁 KÉSZ — nézd meg a cpri_outputs mappát, ott lesznek az új *_daily_imputed.csv fájlok.


In [None]:
# Feltételezve, hogy df a napi adatokat tartalmazza
df['year'] = pd.to_datetime(df['date']).dt.year

# Csak numerikus oszlopokat hagyunk meg
num_cols = df.select_dtypes(include=[np.number]).columns.tolist() + ['year']
df = df[num_cols]

# Éves aggregálás
df_yearly = df.groupby('year').mean().reset_index()


ValueError: Grouper for 'year' not 1-dimensional

In [None]:
# --- 📦 Könyvtárak ---
import pandas as pd
import numpy as np
import glob, os

# --- 📁 Beállítások ---
input_dir = "/content/cpri_outputs"
os.makedirs(input_dir, exist_ok=True)

# --- 🌍 Régió-súlyok ---
region_weights = {
    "dania": 0.8, "denmark": 0.8,
    "nemet": 0.6, "germany": 0.6,
    "uk": 0.9, "netherlands": 0.9, "holland": 0.9,
    "usa": 1.0, "united states": 1.0,
    "tajvan": 1.0, "taiwan": 1.0
}

# --- ⚙️ Hazard változók ---
hazard_vars = {
    "flood": "tp_sum_mean",           # csapadék
    "heatwave": "t2m_mean_mean",      # hőmérséklet
    "windstorm": "u10_max_mean",      # szélsebesség
    "coldwave": "sd_mean_mean",       # hó/fagy
    "solaranomaly": "ssrd_mean_mean"  # napfény-anomália
}

# --- 🧮 Éves aggregálás minden daily fájlra ---
files = glob.glob(f"{input_dir}/*_daily_imputed.csv") or glob.glob(f"{input_dir}/*_daily_merged.csv")
if not files:
    raise FileNotFoundError("⚠️ Nincs napi CSV fájl a megadott mappában!")

hazard_dfs = []

for file in files:
    region = os.path.basename(file).split("_")[0].lower()
    print(f"\n📂 Feldolgozás: {region}")

    df = pd.read_csv(file)

    # dátum és év kinyerése
    df['date'] = pd.to_datetime(df['date'], errors='coerce')
    df = df.dropna(subset=['date'])
    df['year'] = df['date'].dt.year

    # numerikus oszlopok
    num_cols = df.select_dtypes(include=[np.number]).columns.tolist()
    if 'year' not in num_cols:
        num_cols.append('year')
    df = df[num_cols]

    # éves átlagolás
    df_yearly = df.groupby('year').mean(numeric_only=True).reset_index()

    # hazard-specifikus számítás
    for hz, col in hazard_vars.items():
        if col not in df_yearly.columns:
            continue

        # nyers értékek
        H_raw = df_yearly[col].mean(skipna=True)
        threshold = df_yearly[col].quantile(0.95)
        P_raw = (df_yearly[col] > threshold).sum() / len(df_yearly)

        # normalizálás
        df_yearly[f"{hz}_H_norm"] = (df_yearly[col] - df_yearly[col].min()) / (df_yearly[col].max() - df_yearly[col].min() + 1e-9)
        df_yearly[f"{hz}_P_norm"] = np.clip(P_raw, 0, 1)

        # regionális súlyozás
        weight = region_weights.get(region, 1.0)
        df_yearly['E'] = (df_yearly[f"{hz}_H_norm"] + df_yearly[f"{hz}_P_norm"]) / 2 * weight
        df_yearly['hazard'] = hz
        df_yearly['region'] = region

        hazard_dfs.append(df_yearly[['hazard', 'region', 'year', f"{hz}_H_norm", f"{hz}_P_norm", 'E']])

# --- 📊 Összefűzés ---
hazard_all = pd.concat(hazard_dfs, ignore_index=True)
hazard_all.to_csv(f"{input_dir}/exposure_matrix.csv", index=False)
print(f"\n✅ Exposure matrix mentve: {len(hazard_all)} sor")

# --- 🧱 Assetek betöltése ---
assets_path = f"{input_dir}/assets.xlsx"
if not os.path.exists(assets_path):
    raise FileNotFoundError(f"⚠️ Hiányzik az asset fájl: {assets_path}")

assets = pd.read_excel(assets_path)

# --- 💀 Sérülékenység (V) számítása ---
def get_V0(row):
    type_map = {
        ("wind", "offshore"): 0.6, ("wind", "onshore"): 0.4,
        ("solar", "pv"): 0.3, ("hydro", "-"): 0.5,
        ("biomass", "-"): 0.4, ("gas", "-"): 0.7,
        ("coal", "-"): 0.7, ("nuclear", "-"): 0.8,
        ("hydrogen", "-"): 0.9, ("storage", "battery"): 0.7,
        ("grid", "transmission"): 0.6,
    }
    return type_map.get((str(row['type']).lower(), str(row['subtype']).lower()), 0.5)

assets["V0"] = assets.apply(get_V0, axis=1)
assets["age_factor"] = 2025 - assets["commission_year"]
assets["maintenance"] = 0.7
assets["V"] = np.clip(
    assets["V0"] * (1 + 0.3 * assets["age_factor"] / 50) * (1 - 0.4 * assets["maintenance"]),
    0, 1
)
assets.to_csv(f"{input_dir}/vulnerability_matrix.csv", index=False)
print(f"✅ Vulnerability matrix mentve ({len(assets)} sor)")

# --- ⚡ r értékek számítása ---
results = []
for _, a in assets.iterrows():
    reg = a["country"].lower()
    subset = hazard_all[hazard_all["region"].str.contains(reg[:3], na=False)]
    for _, s in subset.iterrows():
        H_cols = [c for c in s.index if "H_norm" in c]
        P_cols = [c for c in s.index if "P_norm" in c]
        H = s[H_cols[0]] if H_cols else np.nan
        P = s[P_cols[0]] if P_cols else np.nan
        E = s["E"]
        V = a["V"]
        r = H * P * E * V if not pd.isna(H) and not pd.isna(P) else 0
        results.append({
            "asset_id": a["asset_id"],
            "hazard": s["hazard"],
            "region": s["region"],
            "year": s["year"],
            "H_norm": H,
            "P_norm": P,
            "E": E,
            "V": V,
            "r": r
        })

df_r = pd.DataFrame(results)
df_r.to_csv(f"{input_dir}/r_values_enhanced.csv", index=False)
print(f"\n✅ Mentve: {input_dir}/r_values_enhanced.csv ({len(df_r)} sor)")

# --- 📈 Diagnosztika ---
print("\n--- Diagnosztika ---")
for hz in df_r['hazard'].unique():
    sub = df_r[df_r['hazard'] == hz]
    total = len(sub)
    zeros = (sub['r'] == 0).sum()
    print(f"{hz:15} → {zeros}/{total} = {zeros/total:.1%} nullás érték")

print("\n🏁 Kész, minden lépés sikeresen lefutott!")



📂 Feldolgozás: dania

📂 Feldolgozás: usa

📂 Feldolgozás: uk

📂 Feldolgozás: nemet

📂 Feldolgozás: tajvan

✅ Exposure matrix mentve: 528 sor
✅ Vulnerability matrix mentve (26 sor)

✅ Mentve: /content/cpri_outputs/r_values_enhanced.csv (1008 sor)

--- Diagnosztika ---
flood           → 9/216 = 4.2% nullás érték
heatwave        → 216/216 = 100.0% nullás érték
windstorm       → 216/216 = 100.0% nullás érték
coldwave        → 144/144 = 100.0% nullás érték
solaranomaly    → 216/216 = 100.0% nullás érték

🏁 Kész, minden lépés sikeresen lefutott!
