<a href="https://colab.research.google.com/github/esb-index/Barka-AV/blob/main/Untitled0.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!mkdir -p /content/era5_processed_light && mv /content/*_daily_light.csv /content/era5_processed_light/


In [None]:
# --- 1️⃣ Importok ---
import pandas as pd
import numpy as np
import os
from tqdm import tqdm
from scipy.stats.mstats import winsorize

# --- 2️⃣ Beállítások ---
INPUT_PATH = "/content/"
OUTPUT_PATH = "/content/cpri_outputs/"
os.makedirs(OUTPUT_PATH, exist_ok=True)

# --- 3️⃣ Országok beolvasása ---
files = [f for f in os.listdir(INPUT_PATH) if f.endswith("_daily_light.csv")]
print("Talált napi fájlok:", files)

# --- 4️⃣ Helper: winsorizálás + normalizálás ---
def winsorize_series(s):
    return winsorize(s, limits=[0.01, 0.01])

def normalize_series(s):
    return (s - s.min()) / (s.max() - s.min()) if s.max() != s.min() else s*0

# --- 5️⃣ Feldolgozás ---
yearly_records = {"heatwave": [], "flood": [], "windstorm": []}

for f in tqdm(files):
    df = pd.read_csv(os.path.join(INPUT_PATH, f))
    country = f.split("_")[0].lower()

    # biztosítjuk, hogy legyen 'date' és 't2m', 'tp', 'u10', 'v10' oszlop
    if 'date' not in df.columns:
        df.rename(columns={c: 'date' for c in df.columns if 'time' in c.lower()}, inplace=True)
    df['date'] = pd.to_datetime(df['date'])
    df['year'] = df['date'].dt.year

    #  --- HEATWAVE: 2m_temp > 90. percentil ---
    if 't2m' in df.columns:
        t2m = df['t2m']
        thresh = np.nanpercentile(t2m, 90)
        df['heatwave_flag'] = (t2m > thresh).astype(int)
        hw_yearly = df.groupby('year').agg(
            H_raw=('t2m', 'mean'),
            P_raw=('heatwave_flag', 'sum')
        ).reset_index()
        hw_yearly['region'] = country
        yearly_records['heatwave'].append(hw_yearly)

    # --- FLOOD: total_precipitation > 95. percentil ---
    if 'tp' in df.columns:
        tp = df['tp']
        thresh = np.nanpercentile(tp, 95)
        df['flood_flag'] = (tp > thresh).astype(int)
        fl_yearly = df.groupby('year').agg(
            H_raw=('tp', 'sum'),
            P_raw=('flood_flag', 'sum')
        ).reset_index()
        fl_yearly['region'] = country
        yearly_records['flood'].append(fl_yearly)

    # --- WINDSTORM: 10m szélsebesség (sqrt(u10²+v10²)) > 95. percentil ---
    if {'u10','v10'}.issubset(df.columns):
        df['wind_speed'] = np.sqrt(df['u10']**2 + df['v10']**2)
        thresh = np.nanpercentile(df['wind_speed'], 95)
        df['storm_flag'] = (df['wind_speed'] > thresh).astype(int)
        ws_yearly = df.groupby('year').agg(
            H_raw=('wind_speed', 'mean'),
            P_raw=('storm_flag', 'sum')
        ).reset_index()
        ws_yearly['region'] = country
        yearly_records['windstorm'].append(ws_yearly)

# --- 6️⃣ Összesítés, winsorizálás és normalizálás ---
for hazard, dfs in yearly_records.items():
    if len(dfs) == 0:
        continue
    df_all = pd.concat(dfs, ignore_index=True)
    df_all['H_win'] = winsorize_series(df_all['H_raw'])
    df_all['P_win'] = winsorize_series(df_all['P_raw'])
    df_all['H_norm'] = normalize_series(df_all['H_win'])
    df_all['P_norm'] = normalize_series(df_all['P_win'])
    df_all.to_csv(os.path.join(OUTPUT_PATH, f"hazard_yearly_{hazard}.csv"), index=False)
    print(f"✅ Mentve: hazard_yearly_{hazard}.csv ({df_all.shape[0]} sor)")

print("\n🎯 Kész: összes hazard újragenerálva a 'cpri_outputs' mappába!")


Talált napi fájlok: ['uk_daily_light.csv', 'nemet_daily_light.csv', 'usa_daily_light.csv', 'tajvan_daily_light.csv', 'dania_daily_light.csv']


100%|██████████| 5/5 [00:00<00:00, 18.87it/s]


🎯 Kész: összes hazard újragenerálva a 'cpri_outputs' mappába!





In [None]:
import os
print("📂 Aktuális tartalom a /content-ben:")
for f in os.listdir("/content"):
    print(" -", f)


📂 Aktuális tartalom a /content-ben:
 - .config
 - uk_daily_light.csv
 - nemet_daily_light.csv
 - usa_daily_light.csv
 - cpri_outputs
 - tajvan_daily_light.csv
 - dania_daily_light.csv
 - sample_data


In [None]:
import pandas as pd

df = pd.read_csv("/content/dania_daily_light.csv")
print("Sorok száma:", len(df))
print("Oszlopok:", list(df.columns))
print(df.head(3))


Sorok száma: 17532
Oszlopok: ['date', 'tp_sum_mean', 'ssrd_mean_mean', 'u10_max_mean', 'v10_max_mean', 't2m_mean_mean', 'msl_mean_mean', 'sst_mean_mean', 'sd_mean_mean']
         date  tp_sum_mean  ssrd_mean_mean  u10_max_mean  v10_max_mean  \
0  2000-01-01     0.000268       266652.60           NaN           NaN   
1  2000-01-01          NaN             NaN       4.70326       0.92689   
2  2000-01-02     0.000019       190477.27           NaN           NaN   

   t2m_mean_mean  msl_mean_mean  sst_mean_mean  sd_mean_mean  
0            NaN            NaN            NaN           NaN  
1      278.23892      101576.28         277.97       0.00017  
2            NaN            NaN            NaN           NaN  


In [None]:
import pandas as pd
import numpy as np
import glob, os

# --- Segédfüggvény a winsorizáláshoz ---
def winsorize_series(s, low=1, high=99):
    lowv, highv = np.nanpercentile(s, [low, high])
    return np.clip(s, lowv, highv)

# --- Input fájlok keresése ---
files = glob.glob("/content/*_daily_light.csv")
print(f"🔍 Talált fájlok: {len(files)}")

results = []

for f in files:
    region = os.path.basename(f).replace("_daily_light.csv", "")
    print(f"\n🌍 Feldolgozás: {region}")

    df = pd.read_csv(f, parse_dates=['date'])
    df['year'] = pd.to_datetime(df['date']).dt.year

    # --- Az oszlopnevek automatikus felismerése ---
    tp_col = [c for c in df.columns if 'tp' in c][0]
    t2m_col = [c for c in df.columns if 't2m' in c][0]
    u_col = [c for c in df.columns if 'u10' in c][0]
    v_col = [c for c in df.columns if 'v10' in c][0]

    # --- WINDSTORM ---
    df['wind_speed'] = np.sqrt(df[u_col]**2 + df[v_col]**2)
    df_ws = df.groupby('year')['wind_speed'].agg(['mean', 'max', 'count']).reset_index()
    df_ws['H_raw'] = df_ws['max']
    df_ws['P_raw'] = df_ws['mean']
    df_ws['hazard'] = 'windstorm'
    results.append(df_ws.assign(region=region))

    # --- HEATWAVE ---
    df['t2m_C'] = df[t2m_col] - 273.15
    threshold = np.nanpercentile(df['t2m_C'], 95)
    df['is_hot'] = df['t2m_C'] > threshold
    df_hw = df.groupby('year')['is_hot'].sum().reset_index()
    df_hw['H_raw'] = threshold
    df_hw['P_raw'] = df_hw['is_hot']
    df_hw['hazard'] = 'heatwave'
    results.append(df_hw.assign(region=region))

    # --- FLOOD (precipitation) ---
    df_fl = df.groupby('year')[tp_col].sum().reset_index()
    df_fl['H_raw'] = df_fl[tp_col]
    df_fl['P_raw'] = (df_fl[tp_col] > np.nanpercentile(df_fl[tp_col], 90)).astype(int)
    df_fl['hazard'] = 'flood'
    results.append(df_fl.assign(region=region))

# --- Egyesítés és normalizálás ---
all_df = pd.concat(results, ignore_index=True)
all_df['H_win'] = all_df.groupby('hazard')['H_raw'].transform(winsorize_series)
all_df['P_win'] = all_df.groupby('hazard')['P_raw'].transform(winsorize_series)

# Normalizálás hazardonként (min–max)
def normalize(g):
    g['H_norm'] = (g['H_win'] - g['H_win'].min()) / (g['H_win'].max() - g['H_win'].min())
    g['P_norm'] = (g['P_win'] - g['P_win'].min()) / (g['P_win'].max() - g['P_win'].min())
    return g

all_df = all_df.groupby('hazard', group_keys=False).apply(normalize)

# --- Kimenetek külön fájlba ---
os.makedirs("/content/cpri_outputs", exist_ok=True)
for hazard in ['flood', 'windstorm', 'heatwave']:
    subset = all_df[all_df['hazard'] == hazard]
    out_path = f"/content/cpri_outputs/hazard_yearly_{hazard}.csv"
    subset.to_csv(out_path, index=False)
    print(f"✅ Mentve: {out_path} ({len(subset)} sor)")

print("\n🎯 Minden hazard fájl sikeresen elkészült!")


🔍 Talált fájlok: 5

🌍 Feldolgozás: uk

🌍 Feldolgozás: nemet

🌍 Feldolgozás: usa

🌍 Feldolgozás: tajvan

🌍 Feldolgozás: dania
✅ Mentve: /content/cpri_outputs/hazard_yearly_flood.csv (120 sor)
✅ Mentve: /content/cpri_outputs/hazard_yearly_windstorm.csv (120 sor)
✅ Mentve: /content/cpri_outputs/hazard_yearly_heatwave.csv (120 sor)

🎯 Minden hazard fájl sikeresen elkészült!


  all_df = all_df.groupby('hazard', group_keys=False).apply(normalize)


In [None]:
import pandas as pd
import numpy as np
import glob, os

# --- Beállítások ---
ASSETS_XLSX = "/content/assets.xlsx"
HAZARD_PATH = "/content/cpri_outputs/"
OUTPUT_PATH = "/content/cpri_outputs/"

# --- 1️⃣ Assetek beolvasása ---
assets = pd.read_excel(ASSETS_XLSX)
assets.rename(columns={
    'latitude': 'Latitude',
    'longitude': 'Longitude'
}, inplace=True)

assets['Latitude'] = pd.to_numeric(assets['Latitude'], errors='coerce')
assets['Longitude'] = pd.to_numeric(assets['Longitude'], errors='coerce')

print(f"📊 Beolvasott assetek: {assets.shape[0]} db")

# --- 2️⃣ Eredménylista ---
results = []

# --- 3️⃣ Hazard fájlok bejárása ---
hazard_files = glob.glob(os.path.join(HAZARD_PATH, "hazard_yearly_*.csv"))

for hf in hazard_files:
    hazard_name = os.path.basename(hf).replace("hazard_yearly_", "").replace(".csv", "")
    df = pd.read_csv(hf)

    print(f"\n🌍 Feldolgozás: {hazard_name} — {len(df)} sor")

    # minden régióhoz kapcsoljuk az ottani asseteket

# Országnevek leképezése a hazard-fájlok rövid neveire
region_map = {
    "denmark": "dania",
    "uk": "uk",
    "united kingdom": "uk",
    "germany": "nemet",
    "netherlands": "uk",  # holland adatok az UK fájlban voltak
    "taiwan": "tajvan",
    "usa": "usa",
    "united states": "usa",
    "us": "usa"
}

for _, a in assets.iterrows():
    country = a['country'].strip().lower()
    region = region_map.get(country, country)

    subset = df[df['region'].str.lower() == region]
    if subset.empty:
            continue

    for _, s in subset.iterrows():
            year = s['year']
            H = s['H_norm']
            P = s['P_norm']
            E = 1.0  # ideiglenes expozíció
            V = 0.5  # baseline sérülékenység
            r = H * P * E * V
            results.append({
                'asset_id': a['asset_id'],
                'hazard': hazard_name,
                'year': year,
                'H_norm': H,
                'P_norm': P,
                'E': E,
                'V': V,
                'r': r
            })

# --- 4️⃣ Eredmény mentése ---
df_r = pd.DataFrame(results)
os.makedirs(OUTPUT_PATH, exist_ok=True)
out_path = os.path.join(OUTPUT_PATH, "r_values.csv")
df_r.to_csv(out_path, index=False)

print(f"\n✅ Mentve: {out_path}")
print(f"📈 Összes sor: {df_r.shape[0]}")
print(df_r.head(10))


📊 Beolvasott assetek: 20 db

🌍 Feldolgozás: windstorm — 120 sor

🌍 Feldolgozás: heatwave — 120 sor

🌍 Feldolgozás: flood — 120 sor

✅ Mentve: /content/cpri_outputs/r_values.csv
📈 Összes sor: 480
  asset_id hazard  year    H_norm  P_norm    E    V    r
0   DK-001  flood  2000  0.340112     0.0  1.0  0.5  0.0
1   DK-001  flood  2001  0.255916     0.0  1.0  0.5  0.0
2   DK-001  flood  2002  0.199476     0.0  1.0  0.5  0.0
3   DK-001  flood  2003  0.118458     0.0  1.0  0.5  0.0
4   DK-001  flood  2004  0.255484     0.0  1.0  0.5  0.0
5   DK-001  flood  2005  0.130603     0.0  1.0  0.5  0.0
6   DK-001  flood  2006  0.321203     0.0  1.0  0.5  0.0
7   DK-001  flood  2007  0.281886     0.0  1.0  0.5  0.0
8   DK-001  flood  2008  0.239728     0.0  1.0  0.5  0.0
9   DK-001  flood  2009  0.200225     0.0  1.0  0.5  0.0


In [None]:
import pandas as pd
import numpy as np
import os

# --- Beolvasás ---
df = pd.read_csv("/content/cpri_outputs/r_values.csv")

# --- Biztos ami biztos ---
df['r'] = pd.to_numeric(df['r'], errors='coerce')

# --- 1️⃣ Éves CPRI per asset-hazard ---
agg = df.groupby(['asset_id', 'hazard', 'year'], as_index=False)['r'].mean()

# --- 2️⃣ Összesített CPRI per asset (hazardok átlagával) ---
cpri = agg.groupby(['asset_id', 'year'], as_index=False)['r'].mean()
cpri.rename(columns={'r': 'CPRI_index'}, inplace=True)

# --- 3️⃣ Normalizálás (0–1 skálán minden évre külön) ---
cpri['CPRI_norm'] = cpri.groupby('year')['CPRI_index'].transform(
    lambda x: (x - x.min()) / (x.max() - x.min())
)

# --- 4️⃣ Kimenet mentése ---
out_path = "/content/cpri_outputs/cpri_index.csv"
cpri.to_csv(out_path, index=False)

print(f"✅ Mentve: {out_path}")
print(f"📈 Sorok száma: {cpri.shape[0]}")
print("\n📊 Minta:")
print(cpri.head(10))


✅ Mentve: /content/cpri_outputs/cpri_index.csv
📈 Sorok száma: 480

📊 Minta:
  asset_id  year  CPRI_index  CPRI_norm
0   DE-001  2000         0.0        0.0
1   DE-001  2001         0.0        NaN
2   DE-001  2002         0.0        NaN
3   DE-001  2003         0.0        NaN
4   DE-001  2004         0.0        NaN
5   DE-001  2005         0.0        NaN
6   DE-001  2006         0.0        0.0
7   DE-001  2007         0.0        NaN
8   DE-001  2008         0.0        NaN
9   DE-001  2009         0.0        0.0


In [None]:
import pandas as pd

# Német hazard fájlok megnézése
for hazard in ['flood', 'windstorm', 'heatwave']:
    df = pd.read_csv(f"/content/cpri_outputs/hazard_yearly_{hazard}.csv")
    print(f"\n=== {hazard.upper()} — példasorok Németországra ===")
    print(df[df['region'] == 'nemet'].head(10))



=== FLOOD — példasorok Németországra ===
    year  mean  max  count     H_raw  P_raw hazard region  is_hot  \
24  2000   NaN  NaN    NaN  0.036424    0.0  flood  nemet     NaN   
25  2001   NaN  NaN    NaN  0.039963    0.0  flood  nemet     NaN   
26  2002   NaN  NaN    NaN  0.036833    0.0  flood  nemet     NaN   
27  2003   NaN  NaN    NaN  0.023013    0.0  flood  nemet     NaN   
28  2004   NaN  NaN    NaN  0.039967    0.0  flood  nemet     NaN   
29  2005   NaN  NaN    NaN  0.022228    0.0  flood  nemet     NaN   
30  2006   NaN  NaN    NaN  0.034454    0.0  flood  nemet     NaN   
31  2007   NaN  NaN    NaN  0.036744    0.0  flood  nemet     NaN   
32  2008   NaN  NaN    NaN  0.033625    0.0  flood  nemet     NaN   
33  2009   NaN  NaN    NaN  0.040384    0.0  flood  nemet     NaN   

    tp_sum_mean     H_win  P_win    H_norm  P_norm  
24     0.036424  0.036424    0.0  0.226529     0.0  
25     0.039963  0.039963    0.0  0.283613     0.0  
26     0.036833  0.036833    0.0  0.233

In [None]:
import pandas as pd
import glob
import os

# --- A CSV-k elérési útjai ---
paths = [
    "/content/cpri_outputs/hazard_yearly_flood.csv",
    "/content/cpri_outputs/hazard_yearly_windstorm.csv",
    "/content/cpri_outputs/hazard_yearly_heatwave.csv",
    "/content/cpri_outputs/r_values.csv",
    "/content/cpri_outputs/cpri_index.csv"
]

combined = []

# --- Összefűzés, csak ha a fájl létezik ---
for p in paths:
    if os.path.exists(p):
        df = pd.read_csv(p)
        df['source'] = os.path.basename(p)  # új oszlop: forrás
        combined.append(df)
    else:
        print(f"⚠️ Hiányzik: {p}")

if combined:
    df_all = pd.concat(combined, ignore_index=True)
    out_path = "/content/combined_cpri_data.csv"
    df_all.to_csv(out_path, index=False)
    print(f"✅ Összefűzve: {out_path}")
    print(f"📈 Összes sor: {len(df_all)}")
else:
    print("❌ Nem találtam egyetlen forrást sem!")


✅ Összefűzve: /content/combined_cpri_data.csv
📈 Összes sor: 1320


In [None]:
import pandas as pd

df = pd.read_csv("/content/combined_cpri_data.csv")

print("Sorok száma:", len(df))
print("Oszlopnevek:", list(df.columns))
print("\nForrásfájlok:", df['source'].unique())
print("\nElső 5 sor:")
print(df.head(5))


Sorok száma: 1320
Oszlopnevek: ['year', 'mean', 'max', 'count', 'H_raw', 'P_raw', 'hazard', 'region', 'is_hot', 'tp_sum_mean', 'H_win', 'P_win', 'H_norm', 'P_norm', 'source', 'asset_id', 'E', 'V', 'r', 'CPRI_index', 'CPRI_norm']

Forrásfájlok: ['hazard_yearly_flood.csv' 'hazard_yearly_windstorm.csv'
 'hazard_yearly_heatwave.csv' 'r_values.csv' 'cpri_index.csv']

Első 5 sor:
   year  mean  max  count     H_raw  P_raw hazard region  is_hot  tp_sum_mean  \
0  2000   NaN  NaN    NaN  0.045559    1.0  flood     uk     NaN     0.045559   
1  2001   NaN  NaN    NaN  0.038407    0.0  flood     uk     NaN     0.038407   
2  2002   NaN  NaN    NaN  0.042355    0.0  flood     uk     NaN     0.042355   
3  2003   NaN  NaN    NaN  0.033754    0.0  flood     uk     NaN     0.033754   
4  2004   NaN  NaN    NaN  0.038914    0.0  flood     uk     NaN     0.038914   

   ...  P_win    H_norm  P_norm                   source asset_id   E   V   r  \
0  ...    1.0  0.373858     1.0  hazard_yearly_flood.cs

In [None]:
import pandas as pd
import numpy as np
import os

# --- Fájlok elérési útja ---
ASSETS_XLSX = "/content/assets.xlsx"
COMBINED_CSV = "/content/combined_cpri_data.csv"
OUTPUT_PATH = "/content/cpri_outputs"
os.makedirs(OUTPUT_PATH, exist_ok=True)

# --- 1️⃣ Beolvasás ---
assets = pd.read_excel(ASSETS_XLSX)
assets.rename(columns={'latitude': 'Latitude', 'longitude': 'Longitude'}, inplace=True)
assets['country'] = assets['country'].str.strip().str.lower()

data = pd.read_csv(COMBINED_CSV)

print(f"Beolvasott assetek: {assets.shape[0]}, adatsorok: {data.shape[0]}")

# --- 2️⃣ Országnév-leképezés a hazard régiókra ---
region_map = {
    "denmark": "dania",
    "germany": "nemet",
    "netherlands": "uk",   # holland adatok az UK-fájlban voltak
    "uk": "uk",
    "united kingdom": "uk",
    "taiwan": "tajvan",
    "usa": "usa",
    "united states": "usa",
    "us": "usa"
}

# --- 3️⃣ Szűrés csak a hazard adatokra ---
hazard_df = data[data['source'].str.contains("hazard_yearly")].copy()
hazard_df = hazard_df[['hazard','region','year','H_norm','P_norm']].dropna(subset=['H_norm','P_norm'])
print("Hazard-régiók:", hazard_df['region'].unique())

# --- 4️⃣ Új r_values táblázat létrehozása ---
results = []
for _, a in assets.iterrows():
    country = a['country']
    region = region_map.get(country, country)
    subset = hazard_df[hazard_df['region'].str.lower() == region]
    if subset.empty:
        continue

    for _, s in subset.iterrows():
        r = s['H_norm'] * s['P_norm'] * 1.0 * 0.5
        results.append({
            'asset_id': a['asset_id'],
            'hazard': s['hazard'],
            'year': int(s['year']),
            'H_norm': s['H_norm'],
            'P_norm': s['P_norm'],
            'E': 1.0,
            'V': 0.5,
            'r': r
        })

r_df = pd.DataFrame(results)
r_df.to_csv(f"{OUTPUT_PATH}/r_values_rebuilt.csv", index=False)
print(f"✅ Új r_values_rebuilt.csv mentve ({len(r_df)} sor)")

# --- 5️⃣ CPRI index újraszámítása ---
agg = r_df.groupby(['asset_id','hazard','year'], as_index=False)['r'].mean()
cpri = agg.groupby(['asset_id','year'], as_index=False)['r'].mean()
cpri.rename(columns={'r': 'CPRI_index'}, inplace=True)

# normalizálás év szerint
cpri['CPRI_norm'] = cpri.groupby('year')['CPRI_index'].transform(
    lambda x: (x - x.min()) / (x.max() - x.min())
)

out_path = f"{OUTPUT_PATH}/cpri_index_rebuilt.csv"
cpri.to_csv(out_path, index=False)
print(f"✅ Új cpri_index_rebuilt.csv mentve ({len(cpri)} sor)")
print(cpri.head(10))


Beolvasott assetek: 20, adatsorok: 1320
Hazard-régiók: ['uk' 'nemet' 'usa' 'tajvan' 'dania']
✅ Új r_values_rebuilt.csv mentve (1438 sor)
✅ Új cpri_index_rebuilt.csv mentve (480 sor)
  asset_id  year  CPRI_index  CPRI_norm
0   DE-001  2000    0.136033    1.00000
1   DE-001  2001    0.096679    1.00000
2   DE-001  2002    0.144593    1.00000
3   DE-001  2003    0.099303    1.00000
4   DE-001  2004    0.111654    1.00000
5   DE-001  2005    0.158265    1.00000
6   DE-001  2006    0.109066    0.38377
7   DE-001  2007    0.146028    1.00000
8   DE-001  2008    0.141180    1.00000
9   DE-001  2009    0.148049    0.83310
