In [54]:
import pandas as pd

# 1. SETTINGS AND CONSTANTS
USD_TO_PHP = 57.65

# 2. LOAD DATA
population_df = pd.read_csv('./datasets/raw/population.csv', index_col='Region', header=0, thousands=',')
population_df.index = population_df.index.str.strip()
population_df['Average Population'] = population_df.loc[:, '2015':'2024'].mean(axis=1)

gdp_df = pd.read_csv('./datasets/raw/gdp.csv', index_col='Region', header=0, thousands=',')
gdp_df.index = gdp_df.index.str.strip()
gdp_df['Average GDP'] = gdp_df.loc[:, '2015':'2024'].mean(axis=1)

emdat_df = pd.read_csv('./datasets/clean/emdat_ph_regions.csv', index_col='DisNo.', parse_dates=['Entry Date'])
emdat_df = emdat_df[emdat_df['Entry Date'].between('2015-01-01', '2024-12-31')]

# 3. TRANSFORM AND EXPLODE
emdat_df_exploded = emdat_df.copy()
emdat_df_exploded['PH Region'] = emdat_df_exploded['PH Regions'].str.split(';')
emdat_df_exploded = emdat_df_exploded.explode('PH Region').copy()
emdat_df_exploded['PH Region'] = emdat_df_exploded['PH Region'].str.strip()

# DISASTER FREQUENCY
disaster_risk_index_df = emdat_df_exploded.groupby('PH Region').size().to_frame(name='Disaster Count')
disaster_risk_index_df['Disaster Frequency'] = disaster_risk_index_df['Disaster Count'] / 10
df_min, df_max = disaster_risk_index_df['Disaster Frequency'].min(), disaster_risk_index_df['Disaster Frequency'].max()
disaster_risk_index_df['Disaster Frequency (Normalized)'] = ((disaster_risk_index_df['Disaster Frequency'] - df_min) / (df_max - df_min)) * 100

# 5. HUMAN IMPACT (POPULATION WEIGHTED)
emdat_df_exploded['Region_Pop'] = emdat_df_exploded['PH Region'].map(population_df['Average Population'])
total_pop_per_disaster = emdat_df_exploded.groupby(level=0)['Region_Pop'].transform('sum')
emdat_df_exploded['Human_Share'] = emdat_df_exploded['Region_Pop'] / total_pop_per_disaster

emdat_df_exploded['Weighted Affected'] = emdat_df_exploded['Human_Share'] * emdat_df_exploded['Total Affected']
disaster_risk_index_df['Total Affected'] = emdat_df_exploded.groupby('PH Region')['Weighted Affected'].sum()
disaster_risk_index_df['Average Population'] = population_df['Average Population']
disaster_risk_index_df['Human Impact'] = disaster_risk_index_df['Total Affected'] / disaster_risk_index_df['Average Population']

hi_min, hi_max = disaster_risk_index_df['Human Impact'].min(), disaster_risk_index_df['Human Impact'].max()
disaster_risk_index_df['Human Impact (Normalized)'] = ((disaster_risk_index_df['Human Impact'] - hi_min) / (hi_max - hi_min)) * 100

# 6. ECONOMIC IMPACT (GDP WEIGHTED)
# Map GDP for splitting damage
emdat_df_exploded['Region_GDP_Val'] = emdat_df_exploded['PH Region'].map(gdp_df['Average GDP'])
total_gdp_per_disaster = emdat_df_exploded.groupby(level=0)['Region_GDP_Val'].transform('sum')
emdat_df_exploded['Economic_Share'] = emdat_df_exploded['Region_GDP_Val'] / total_gdp_per_disaster

# Apply Economic Share to Damage
emdat_df_exploded['Weighted Damage USD'] = emdat_df_exploded['Economic_Share'] * emdat_df_exploded["Total Damage, Adjusted ('000 US$)"]
weighted_damage_total_usd = emdat_df_exploded.groupby('PH Region')['Weighted Damage USD'].sum()

# Convert and Store
disaster_risk_index_df['Total Damage (Adjusted PHP)'] = (weighted_damage_total_usd * 1000) * USD_TO_PHP
disaster_risk_index_df['Average GDP'] = gdp_df['Average GDP']
disaster_risk_index_df['Economic Impact'] = disaster_risk_index_df['Total Damage (Adjusted PHP)'] / disaster_risk_index_df['Average GDP']

ei_min, ei_max = disaster_risk_index_df['Economic Impact'].min(), disaster_risk_index_df['Economic Impact'].max()
disaster_risk_index_df['Economic Impact (Normalized)'] = ((disaster_risk_index_df['Economic Impact'] - ei_min) / (ei_max - ei_min)) * 100

# 7. FINAL DISASTER RISK INDEX
disaster_risk_index_df['Disaster Risk Score'] = (
    (disaster_risk_index_df['Disaster Frequency (Normalized)'] ** 1/3) +
    (1/3 * disaster_risk_index_df['Human Impact (Normalized)'] ** 1/3) +
    (1/3 * disaster_risk_index_df['Economic Impact (Normalized)'] ** 1/3)
)

# 8. OUTPUT
pd.set_option('display.float_format', '{:.2f}'.format)
disaster_risk_index_df.to_csv('./datasets/clean/disaster_risk_index.csv', index='PH Region')
display(disaster_risk_index_df)

Unnamed: 0_level_0,Disaster Count,Disaster Frequency,Disaster Frequency (Normalized),Total Affected,Average Population,Human Impact,Human Impact (Normalized),Total Damage (Adjusted PHP),Average GDP,Economic Impact,Economic Impact (Normalized),Disaster Risk Score
PH Region,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
Bangsamoro Autonomous Region in Muslim Mindanao,24,2.4,12.5,1657727.37,4305348.8,0.39,8.71,1724737068.72,280770554.0,6.14,0.74,5.22
Cordillera Administrative Region,36,3.6,62.5,1918905.6,1792257.3,1.07,66.1,36427336129.61,335675496.48,108.52,77.87,36.83
National Capital Region,23,2.3,8.33,7349941.48,13523820.5,0.54,21.97,74287916533.61,6333211338.33,11.73,4.95,5.77
Region I - Ilocos Region,39,3.9,75.0,5930111.93,5259669.1,1.13,70.85,85924994379.99,657600124.81,130.66,94.55,43.38
Region II - Cagayan Valley,45,4.5,100.0,4682848.65,3653605.2,1.28,83.76,57846158113.77,419497031.27,137.89,100.0,53.75
Region III - Central Luzon,39,3.9,75.0,10758019.55,12262436.3,0.88,49.91,42001765744.34,2175810167.0,19.3,10.65,31.73
Region IVA - CALABARZON,45,4.5,100.0,15188984.14,16003842.2,0.95,55.92,62682672729.68,2875277272.17,21.8,12.53,40.94
Region IVB - MIMAROPA,36,3.6,62.5,4024816.87,3177330.1,1.27,82.51,11865653701.81,392869888.97,30.2,18.86,32.1
Region IX - Zamboanga Peninsula,21,2.1,0.0,1110317.87,3951319.1,0.28,0.0,2235580552.33,432961781.85,5.16,0.0,0.0
Region V - Bicol,41,4.1,83.33,5680726.69,6051966.9,0.94,55.05,10523960790.36,576758546.83,18.25,9.86,34.99
