<a href="https://colab.research.google.com/github/esb-index/Barka-AV/blob/main/Untitled0.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
# Colab: process ERA5 annual Excel -> hazard proxies workbook
!pip install xarray pandas numpy openpyxl --quiet

import pandas as pd, numpy as np, os, io
from google.colab import files

print("üëâ K√©rlek t√∂ltsd fel az ERA5 feldolgozott Excel f√°jlodat (pl. ERA5_Hungary_2000_2022_processed.xlsx).")
uploaded = files.upload()  # v√°laszd ki a helyi f√°jlt

# A felt√∂lt√∂tt f√°jl neve
fname = list(uploaded.keys())[0]
print("Felt√∂ltve:", fname)

# Bet√∂lt√©s
df = pd.read_excel(fname)
df = df.sort_values("year").reset_index(drop=True)

# Ellen≈ërz√©s: kell≈ë oszlopok
required = {"year","mean_temp","max_temp","total_precip","heatwave_days"}
if not required.issubset(set(df.columns)):
    raise SystemExit(f"Hi√°nyz√≥ oszlop(ok) az inputban. V√°rtam: {required}. Tal√°ltam: {set(df.columns)}")

years = df['year'].tolist()

# Hazards list
hazards = [
    "heatwave","drought","flood","windstorm","wildfire",
    "extreme_cold","geomagnetic","earthquake","pandemic","geopolitical"
]

# Build raw proxies
raw_rows = []
for _, r in df.iterrows():
    y = int(r['year'])
    mean_temp = float(r['mean_temp'])
    max_temp = float(r['max_temp'])
    total_precip = float(r['total_precip'])
    heatwave_days = float(r['heatwave_days'])

    # heatwave: intensity = max_temp, frequency = heatwave_days
    raw_rows.append({"year":y,"hazard_id":"heatwave","H_raw":max_temp,"P_raw":heatwave_days,"source":"ERA5_proxy"})

    # flood: intensity = total_precip, frequency proxy -> 1 if >75th pct else 0 (we'll compute after)
    raw_rows.append({"year":y,"hazard_id":"flood","H_raw":total_precip,"P_raw":total_precip,"source":"ERA5_proxy"})

    # drought: intensity proxy = max(0, median_precip - total_precip) (deficit)
    raw_rows.append({"year":y,"hazard_id":"drought","H_raw":total_precip,"P_raw":total_precip,"source":"ERA5_proxy"})

    # windstorm: no wind data here -> missing
    raw_rows.append({"year":y,"hazard_id":"windstorm","H_raw":np.nan,"P_raw":np.nan,"source":"missing_data"})

    # wildfire: intensity proxy = max_temp / (total_precip+1), freq proxy = heatwave_days
    raw_rows.append({"year":y,"hazard_id":"wildfire","H_raw":(max_temp/(total_precip+1.0)),"P_raw":heatwave_days,"source":"ERA5_proxy"})

    # extreme_cold: intensity proxy = -mean_temp (colder -> higher intensity), freq proxy -> will set after
    raw_rows.append({"year":y,"hazard_id":"extreme_cold","H_raw":(-mean_temp),"P_raw":np.nan,"source":"ERA5_proxy"})

    # the rest: missing (geomagnetic, earthquake, pandemic, geopolitical)
    for hid in ["geomagnetic","earthquake","pandemic","geopolitical"]:
        raw_rows.append({"year":y,"hazard_id":hid,"H_raw":np.nan,"P_raw":np.nan,"source":"missing_data"})

hazard_raw = pd.DataFrame(raw_rows)

# Post-process drought/flood/extreme_cold frequency proxies
precip_med = df['total_precip'].median()
precip_q75 = df['total_precip'].quantile(0.75)
precip_q25 = df['total_precip'].quantile(0.25)
temp_q25 = df['mean_temp'].quantile(0.25)

for y in years:
    tp = float(df.loc[df['year']==y,'total_precip'].iloc[0])
    # drought
    mask = (hazard_raw['year']==y)&(hazard_raw['hazard_id']=='drought')
    drought_H = max(0.0, precip_med - tp)
    drought_P = 1.0 if tp < precip_q25 else 0.0
    hazard_raw.loc[mask,'H_raw'] = drought_H
    hazard_raw.loc[mask,'P_raw'] = drought_P
    # flood P proxy
    mask2 = (hazard_raw['year']==y)&(hazard_raw['hazard_id']=='flood')
    hazard_raw.loc[mask2,'P_raw'] = 1.0 if tp > precip_q75 else 0.0
    # extreme cold P proxy
    mt = float(df.loc[df['year']==y,'mean_temp'].iloc[0])
    mask3 = (hazard_raw['year']==y)&(hazard_raw['hazard_id']=='extreme_cold')
    hazard_raw.loc[mask3,'P_raw'] = 1.0 if mt < temp_q25 else 0.0

# Winsorize 1-99% and min-max normalize per hazard; compute h = H_norm * P_norm
proc_list = []
for hid in hazards:
    sub = hazard_raw[hazard_raw['hazard_id']==hid].sort_values('year').copy()
    # H
    if sub['H_raw'].notna().any():
        low = sub['H_raw'].quantile(0.01); high = sub['H_raw'].quantile(0.99)
        sub['H_win'] = sub['H_raw'].clip(low,high)
        if sub['H_win'].max() != sub['H_win'].min():
            sub['H_norm'] = (sub['H_win'] - sub['H_win'].min()) / (sub['H_win'].max() - sub['H_win'].min())
        else:
            sub['H_norm'] = 0.0
    else:
        sub['H_win'] = np.nan; sub['H_norm'] = np.nan
    # P
    if sub['P_raw'].notna().any():
        lowp = sub['P_raw'].quantile(0.01); highp = sub['P_raw'].quantile(0.99)
        sub['P_win'] = sub['P_raw'].clip(lowp,highp)
        if sub['P_win'].max() != sub['P_win'].min():
            sub['P_norm'] = (sub['P_win'] - sub['P_win'].min()) / (sub['P_win'].max() - sub['P_win'].min())
        else:
            sub['P_norm'] = 0.0
    else:
        sub['P_win'] = np.nan; sub['P_norm'] = np.nan
    sub['h'] = sub['H_norm'] * sub['P_norm']
    proc_list.append(sub)

hazard_processed = pd.concat(proc_list, ignore_index=True).sort_values(['hazard_id','year'])

# Save workbook
outname = "ERA5_hazard_proxies_processed.xlsx"
with pd.ExcelWriter(outname, engine="openpyxl") as writer:
    df.to_excel(writer, sheet_name="era5_annual_raw", index=False)
    hazard_raw.to_excel(writer, sheet_name="hazard_raw", index=False)
    hazard_processed.to_excel(writer, sheet_name="hazard_processed", index=False)
    pd.DataFrame([{"note":"Proxy derivation: climate hazards (heatwave, drought, flood, wildfire, extreme_cold) derived from ERA5 annual aggregates. Others flagged missing (need external datasets or imputation). Winsorize 1-99%, min-max normalization across 2000-2022."}]).to_excel(writer, sheet_name="README", index=False)

print("K√©sz! Let√∂lt√©s indul: ", outname)
files.download(outname)


üëâ K√©rlek t√∂ltsd fel az ERA5 feldolgozott Excel f√°jlodat (pl. ERA5_Hungary_2000_2022_processed.xlsx).


Saving ERA5_Hungary_2000_2022_processed (1).xlsx to ERA5_Hungary_2000_2022_processed (1).xlsx
Felt√∂ltve: ERA5_Hungary_2000_2022_processed (1).xlsx
K√©sz! Let√∂lt√©s indul:  ERA5_hazard_proxies_processed.xlsx


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>