In [None]:
# Combine all data from P1, P2 into final CCRI layer

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
import os
import pandas as pd
import geopandas as gpd
import glob
import numpy as np

In [None]:
# Query data dir (avoiding hard-coding paths when working between users)
#/content/drive/MyDrive/CCRI/ccri_repo/data
data_dir = '/content/drive/MyDrive/CCRI/ccri_repo/data'
print(data_dir)

/content/drive/MyDrive/CCRI/ccri_repo/data


In [None]:
# Check that data dir was input correctly
data_dir

'/content/drive/MyDrive/CCRI/ccri_repo/data'

In [None]:
# Read in all the data we need
# -- exposure files
p1_exposure_file = pd.read_csv('{}/CCRI_results_misc/Merged_Exposure_Data.csv'.format(data_dir))
# Identify columns to rescale (exclude 'iso3' or non-numeric columns)
cols_to_rescale = p1_exposure_file.select_dtypes(include='number').columns
# Apply min-max scaling to 0–10
p1_exposure_file[cols_to_rescale] = p1_exposure_file[cols_to_rescale].apply(
    lambda col: 10 * (col - col.min()) / (col.max() - col.min())
)


p2_exposure_file = pd.read_csv('{}/CCRI_results_misc/P2_Merged_Normalized_avg.csv'.format(data_dir))
p1p2_scores = pd.read_csv('{}/CCRI_results_misc/p1_p2_avg_ccri.csv'.format(data_dir))

# -- attribute files
wb_income = pd.read_csv('{}/misc/WB_INCOME.csv'.format(data_dir))
unicef_ro = pd.read_csv('{}/misc/UNICEF_PROG_REG_GLOBAL.csv'.format(data_dir))

# -- population files
childpop = pd.read_csv('{}/CCRI_results_misc/child_pop_sum_adm0.csv'.format(data_dir))
#worldpop = pd.read_csv('{}/Misc/World_Population_ByAOI_adm0.csv'.format(data_dir))

# -- boundary file
adm0 = gpd.read_file('{}/misc/adm0_boundaries_simple.geojson'.format(data_dir))

# -- fragile codes
fragile = pd.read_csv('{}/misc/List of fragile context (2025).csv'.format(data_dir))

# -- component vals
p1_components = pd.read_csv('{}/CCRI_results_misc/p1_group_mean.csv'.format(data_dir))
# Identify numeric columns to rescale (exclude non-numeric like 'iso3', 'group', etc.)
cols_to_rescale = p1_components.select_dtypes(include='number').columns
# Apply min-max scaling to range 0–10
p1_components[cols_to_rescale] = p1_components[cols_to_rescale].apply(
    lambda col: 10 * (col - col.min()) / (col.max() - col.min())
)

p2_components = pd.read_csv('{}/CCRI_results_misc/p2_group_mean.csv'.format(data_dir))

In [None]:
# List of columns to exclude from missing value calculation
exclude_cols = ['iso3', 'P2_arithmetic_avg', 'rank_reverse']

# Subset of columns to calculate missingness on
cols_to_check = [col for col in p2_exposure_file.columns if col not in exclude_cols]

# Calculate missing percentage per row (country)
p2_exposure_file['P2_missing_val'] = p2_exposure_file[cols_to_check].isna().mean(axis=1) * 100

# Preview the result
print(p2_exposure_file[['iso3', 'P2_missing_val']].head())


  iso3  P2_missing_val
0  AFG       11.764706
1  ALB       11.764706
2  DZA       11.764706
3  AND       58.823529
4  AGO        5.882353


In [None]:
# Renaming some columns
p1_exposure_file.columns = [col.replace('_absolute', '_abs_norm') if '_absolute' in col else col for col in p1_exposure_file.columns]
p1_exposure_file.columns = [col.replace('_relative', '_rel_norm') if '_relative' in col else col for col in p1_exposure_file.columns]

In [None]:
# Merge P exposures by ISO3
merged_P = (p1_exposure_file.merge(p2_exposure_file, on='iso3', how='left'))
all_P = (merged_P.merge(p1p2_scores, on='iso3', how='left'))

In [None]:
all_P = all_P.drop(columns=['P2_arithmetic_avg_y', 'rank_reverse_x'])
all_P = all_P.rename(columns={'P2_arithmetic_avg_x': 'P2_arithmetic_avg', 'rank_reverse_y': 'rank_reverse'})

In [None]:
# Add WB income
wb_income = wb_income[['Region_Code', 'ISO3Code']]
wb_income.loc[:, 'Region_Code'] = wb_income['Region_Code'].str.extract(r'WB_(.*)')
df = (all_P.merge(wb_income, left_on='iso3', right_on='ISO3Code', how='left').drop('ISO3Code', axis=1).rename(columns={'Region_Code': 'wb_income'}))

In [None]:
# Add Regional Office
unicef_ro = unicef_ro[['Region_Code','ISO3Code']]
unicef_ro.loc[:,'Region_Code'] = unicef_ro['Region_Code'].str.extract(r'UNICEF_(.*)')
df = (df.merge(unicef_ro, left_on='iso3', right_on='ISO3Code', how='left').drop('ISO3Code', axis=1).rename(columns={'Region_Code': 'unicef_ro'}))

In [None]:
# Add population data
# -- Take the childpop data from the geojson of p1p2 avg
gdf = gpd.read_file('{}/CCRI_results_misc/p1_p2_avg_ccri.geojson'.format(data_dir))
df_grouped = gdf[['ISO3', 'child_population_total', 'population_total']].groupby('ISO3', as_index=False).mean()
df_w_childpop = (df.merge(df_grouped, left_on=['iso3'], right_on=['ISO3'], how='left').rename(columns={'child_population_total': 'u18_pop'})).drop(columns=['ISO3'])

#df_w_childpop = (df.merge(childpop, left_on='iso3', right_on='ISO3', how='left').rename(columns={'child_population': 'u18_pop'})).drop(columns=['child_population_gpw'])
#df_w_childpop['u18_pop'] = df_w_childpop['u18_pop'].astype(int)

In [None]:
df_w_childpop = df_w_childpop.rename(columns={'iso3':'ISO3'})

In [None]:
'''
# Using World_Population_ByAOI_adm0.csv file -> removing for now to use the population data from the exposure files to keep consistent
df_w_allpop = (df_w_childpop.merge(worldpop, left_on='name', right_on='adm0_name', how='left').rename(columns={'sum': 'total_pop'})).drop(columns=['iso3','adm0_name'])

# Set population data as integer
df_w_allpop['u18_pop'] = df_w_allpop['u18_pop'].astype(int)
df_w_allpop['total_pop'] = df_w_allpop['total_pop'].astype(int)
'''

"\n# Using World_Population_ByAOI_adm0.csv file -> removing for now to use the population data from the exposure files to keep consistent\ndf_w_allpop = (df_w_childpop.merge(worldpop, left_on='name', right_on='adm0_name', how='left').rename(columns={'sum': 'total_pop'})).drop(columns=['iso3','adm0_name'])\n\n# Set population data as integer\ndf_w_allpop['u18_pop'] = df_w_allpop['u18_pop'].astype(int)\ndf_w_allpop['total_pop'] = df_w_allpop['total_pop'].astype(int)\n"

In [None]:
# Using simplified boundaries for geometry
adm0 = adm0[['ISO3', 'name', 'ucode','uuid','geometry','type']]
df_combined = (df_w_childpop.merge(adm0, left_on=['ISO3', 'adm0_name'], right_on=['ISO3', 'name'], how='left'))

In [None]:
# Grabbing actual exposure numbers
# Define file paths
exposure_path = "{}/p1_exposure".format(data_dir)

# Get all CSV files for exposure
exposure_files = glob.glob(os.path.join(exposure_path, "*.csv"))

# Initialize empty list for processed data
exposure_data_list = []

### **Process Each File in One Loop**
for file in exposure_files:
    df = pd.read_csv(file)  # Read full file to check available columns
    filename_only = os.path.basename(file)
    hazard_name = '_'.join(filename_only.split('_')[:2])  # Extract hazard name

    # Ensure required columns exist
    required_cols = {'iso3', 'adm0_name', 'child_population_exposed'}
    if not required_cols.issubset(df.columns):
        print(f"Skipping {file}: Missing columns {required_cols - set(df.columns)}")
        continue  # Skip if required columns are missing

    df.dropna(subset=['child_population_exposed'], inplace=True)

    # Compute relative exposure (%)
    df['{}_rel'.format(hazard_name)] = np.where(
        (df['child_population_total'] > 0) & (~df['child_population_total'].isna()),
        (df['child_population_exposed'] / df['child_population_total']) * 100,
        0
    )

    # Rename to hazard
    df = df.rename(columns={'child_population_exposed': '{}_abs'.format(hazard_name)})
    df = df.drop(columns=['child_population_total', 'population_total'])

    exposure_data_list.append(df)

In [None]:
merged_exposure_df = pd.concat(exposure_data_list, axis=1).drop_duplicates(subset=['iso3', 'adm0_name'])
# Ensure no duplicate columns before merging
merged_exposure_df = merged_exposure_df.loc[:, ~merged_exposure_df.columns.duplicated()]

In [None]:
merged_exposure_df = merged_exposure_df.drop(columns=['type'])

In [None]:
df_combined = (df_combined.merge(merged_exposure_df, left_on=['ISO3', 'adm0_name'], right_on=['iso3', 'adm0_name'], how='left'))

In [None]:
df_combined = df_combined.rename(columns={'population_total': 'total_pop'})

In [None]:
list(df_combined.columns)

['ISO3',
 'adm0_name',
 'air_pollution_abs_norm',
 'air_pollution_rel_norm',
 'agricultural_drought_abs_norm',
 'agricultural_drought_rel_norm',
 'drought_spi_abs_norm',
 'drought_spi_rel_norm',
 'drought_spei_abs_norm',
 'drought_spei_rel_norm',
 'coastal_flood_abs_norm',
 'coastal_flood_rel_norm',
 'extreme_heat_abs_norm',
 'extreme_heat_rel_norm',
 'fire_frequency_abs_norm',
 'fire_frequency_rel_norm',
 'fire_FRP_abs_norm',
 'fire_FRP_rel_norm',
 'heatwave_duration_abs_norm',
 'heatwave_duration_rel_norm',
 'heatwave_frequency_abs_norm',
 'heatwave_frequency_rel_norm',
 'heatwave_severity_abs_norm',
 'heatwave_severity_rel_norm',
 'river_flood_abs_norm',
 'river_flood_rel_norm',
 'tropical_storm_abs_norm',
 'tropical_storm_rel_norm',
 'sand_dust_abs_norm',
 'sand_dust_rel_norm',
 'vectorborne_malariapf_abs_norm',
 'vectorborne_malariapf_rel_norm',
 'vectorborne_malariapv_abs_norm',
 'vectorborne_malariapv_rel_norm',
 'P2_electricity_access_value_normalized',
 'P2_DTP1_access_value_n

In [None]:
multi_hazards_files = glob.glob(os.path.join(data_dir, 'CCRI_results_misc', '*topics.csv'))

all_hazards = None

for file in multi_hazards_files:
    suffix = file.split('/')[-1].replace('child_pop_exposed_', '').replace('_topics.csv', '')
    abs_col = f'mhc_{suffix}_abs'
    rel_col = f'mhc_{suffix}_rel'

    df = pd.read_csv(file, usecols=['ISO3', 'pop_exposed']).rename(columns={'pop_exposed': abs_col})

    # Ensure ISO3 is clean
    df = df.dropna(subset=['ISO3'])
    df['ISO3'] = df['ISO3'].astype(str).str.strip()
    df = df.drop_duplicates(subset='ISO3')

    # Merge with df_combined to get u18_pop for computing relative values
    df = df.merge(df_combined[['ISO3', 'u18_pop']], on='ISO3', how='left')
    df[rel_col] = (df[abs_col] / df['u18_pop'])* 100
    df[rel_col] = df[rel_col].clip(upper=100)

    # Drop u18_pop to avoid repeated columns in merge
    df = df.drop(columns=['u18_pop'])

    if all_hazards is None:
        all_hazards = df
    else:
        all_hazards = all_hazards.merge(df, on='ISO3', how='outer')



In [None]:
df_combined = df_combined.merge(all_hazards, on=['ISO3'], how='left')

In [None]:
multi_hazards_files = glob.glob(os.path.join(data_dir, 'CCRI_results_misc', 'Multi_Hazard_intensity_Exposure_adm0_TH_Percentile*.csv'))

all_hazards = None

for file in multi_hazards_files:
    suffix = file.split('/')[-1].replace('Multi_Hazard_intensity_Exposure_adm0_TH_Percentile', '').replace('.csv', '')
    abs_col = f'mhi_TH{suffix}_abs'
    rel_col = f'mhi_TH{suffix}_rel'

    df = pd.read_csv(file, usecols=['ISO3', 'child_population_exposed']).rename(columns={'child_population_exposed': abs_col})

    # Clean ISO3
    df = df.dropna(subset=['ISO3'])
    df['ISO3'] = df['ISO3'].astype(str).str.strip()
    df = df.drop_duplicates(subset='ISO3')

    # Merge with df_combined to compute relative exposure
    df = df.merge(df_combined[['ISO3', 'u18_pop']], on='ISO3', how='left')
    df[rel_col] = (df[abs_col] / df['u18_pop'])* 100
    df[rel_col] = df[rel_col].clip(upper=100)

    # Drop to avoid duplication
    df = df.drop(columns=['u18_pop'])

    if all_hazards is None:
        all_hazards = df
    else:
        all_hazards = all_hazards.merge(df, on='ISO3', how='outer')



In [None]:
df_combined = df_combined.merge(all_hazards, on=['ISO3'], how='left')

In [None]:
# --- Adding P2 data
vul_path = "{}/p2_vulnerability".format(data_dir)
total_population_file = "{}/p1_exposure/agricultural_drought_fao_1984-2023_exposure_adm0.csv".format(data_dir)

# Load total child population data
total_pop_df = pd.read_csv(total_population_file, usecols=['iso3', 'adm0_name', 'child_population_total'])
total_pop_df = total_pop_df.rename(columns={'iso3': 'ISO3'})

# Ensure unique ISO3-name pairs before merging
total_pop_df = total_pop_df.groupby(['ISO3', 'adm0_name'], as_index=False).agg({'child_population_total': 'sum'})

# Get all CSV files for exposure
p2_vul_files = glob.glob(os.path.join(vul_path, "*.csv"))

# Initialize empty list for processed data
vul_data_list = []

for file in p2_vul_files:
    df = pd.read_csv(file)  # Read full file to check available columns
    if 'iso3' not in df.columns or 'value' not in df.columns:
        continue  # Skip files missing required columns
    # Normalize 'value' column
    df = df[['iso3', 'value']].dropna()
    df = df.rename(columns={'iso3': 'ISO3'}, errors='ignore')
    filename_only = os.path.basename(file)
    hazard_name = '_'.join(filename_only.split('.csv')[:1])  # Extract hazard name
    if hazard_name == 'P2_Child_Mortality':
        continue

    # merge with population data
    df = (df.merge(total_pop_df, on='ISO3', how='left'))

    # rename to hazard and norm
    df['{}'.format(hazard_name)] = np.where(
        (df['child_population_total'] > 0) & (~df['child_population_total'].isna()),
        (df['value']),
        0
    )

    # Rename for relative
    df = df.drop(columns=['child_population_total', 'time_period', 'data_source', 'value'], errors='ignore')

    vul_data_list.append(df)

In [None]:
for i in range(len(vul_data_list)):
    df_combined = (df_combined.merge(vul_data_list[i], on=['ISO3', 'adm0_name'], how='left'))

In [None]:
# Add fragile
fragile['fragile'] = 'fragile'
df_combined = (df_combined.merge(fragile[['ISO3','fragile']], on=['ISO3'], how='left'))

In [None]:
df_combined = df_combined.drop(columns=['iso3'])

In [None]:
# Add components
# Rename
p1_components = p1_components.rename(columns={'river_flood_gmean': 'P1_rfl', 'coastal_flood_gmean': 'P1_cfl',
                                             'storm_gmean': 'P1_ts', 'drought_gmean': 'P1_dr', 'heatwave_gmean': 'P1_hw', 'extreme_heat_gmean': 'P1_ext',
                                             'fire_gmean': 'P1_fr', 'sand_dust_gmean': 'P1_sds', 'air_pollution_gmean': 'P1_pm25',
                                             'malaria_gmean': 'P1_mal'})

p2_components = p2_components.rename(columns={'health': 'P2_hea', 'nutrition': 'P2_nut', 'education': 'P2_edu', 'wash' : 'P2_wash',
                                             'protection': 'P2_pro', 'poverty': 'P2_pov', 'survival': 'P2_sur'})

In [None]:
df_combined = (df_combined.merge(p1_components, left_on=['ISO3', 'adm0_name'], right_on=['iso3', 'adm0_name'], how='left'))

In [None]:
df_combined = (df_combined.merge(p2_components, left_on=['ISO3'], right_on=['iso3'], how='left'))

In [None]:
df_combined = df_combined.drop(columns=['adm0_name'])

In [None]:
# Remove any columns we don't want and rearrange as well
# -- Dropping
# Drop min, max
df_combined = df_combined.drop(df_combined.filter(regex='max').columns, axis=1)
df_combined = df_combined.drop(df_combined.filter(regex='min').columns, axis=1)

# -- Renaming
df_combined = df_combined.rename(columns={'name': 'adm_name', 'P1_P2_geometric_avg': 'ccri', 'ISO3':'iso3'})

In [None]:
# Set to 2 decimal places
for col in df_combined.columns:
    if type(df_combined['{}'.format(col)].iloc[0]) != str:
        if col in ['wb_income', 'unicef_ro', 'geometry', 'fragile']:
            continue
        else:
            df_combined[col] = df_combined[col].round(2)

In [None]:
# Rename normalized to _norm
df_combined.columns = [col.replace('_normalized', '_norm') if 'normalized' in col else col for col in df_combined.columns]

In [None]:
# Renaming the hazards
# P1 hazards
df_combined.columns = [col.replace('river_flood', 'rfl') if 'river_flood' in col else col for col in df_combined.columns]
df_combined.columns = [col.replace('coastal_flood', 'cfl') if 'coastal_flood' in col else col for col in df_combined.columns]
df_combined.columns = [col.replace('tropical_storm', 'ts') if 'tropical_storm' in col else col for col in df_combined.columns]
df_combined.columns = [col.replace('agricultural_drought', 'agdr') if 'agricultural_drought' in col else col for col in df_combined.columns]
df_combined.columns = [col.replace('drought_spei', 'metdr_spei') if 'drought_spei' in col else col for col in df_combined.columns]
df_combined.columns = [col.replace('drought_spi', 'metdr_spi') if 'drought_spi' in col else col for col in df_combined.columns]
df_combined.columns = [col.replace('heatwave_frequency', 'hw_fre') if 'heatwave_frequency' in col else col for col in df_combined.columns]
df_combined.columns = [col.replace('heatwave_severity', 'hw_sev') if 'heatwave_severity' in col else col for col in df_combined.columns]
df_combined.columns = [col.replace('heatwave_duration', 'hw_dur') if 'heatwave_duration' in col else col for col in df_combined.columns]
df_combined.columns = [col.replace('extreme_heat', 'ext') if 'extreme_heat' in col else col for col in df_combined.columns]
df_combined.columns = [col.replace('fire_frequency', 'fr_fre') if 'fire_frequency' in col else col for col in df_combined.columns]
df_combined.columns = [col.replace('fire_FRP', 'fr_int') if 'fire_FRP' in col else col for col in df_combined.columns]
df_combined.columns = [col.replace('sand_dust', 'sds') if 'sand_dust' in col else col for col in df_combined.columns]
df_combined.columns = [col.replace('vectorborne_malariapv', 'mal_pv') if 'vectorborne_malariapv' in col else col for col in df_combined.columns]
df_combined.columns = [col.replace('vectorborne_malariapf', 'mal_pf') if 'vectorborne_malariapf' in col else col for col in df_combined.columns]
df_combined.columns = [col.replace('air_pollution', 'pm25') if 'air_pollution' in col else col for col in df_combined.columns]

# P2 hazards
df_combined.columns = [col.replace('P2_DTP1_access', 'hea_dtp1') if 'P2_DTP1_access' in col else col for col in df_combined.columns]
df_combined.columns = [col.replace('P2_DTP3_access', 'hea_dtp3') if 'P2_DTP3_access' in col else col for col in df_combined.columns]
df_combined.columns = [col.replace('P2_Skilled_birth_coverage', 'hea_skat') if 'P2_Skilled_birth_coverage' in col else col for col in df_combined.columns]
df_combined.columns = [col.replace('P2_electricity_access', 'hea_elec') if 'P2_electricity_access' in col else col for col in df_combined.columns]
df_combined.columns = [col.replace('P2_Stunting', 'nut_stu') if 'P2_Stunting' in col else col for col in df_combined.columns]
df_combined.columns = [col.replace('P2_Child_Food_Poverty', 'nut_fpov') if 'P2_Child_Food_Poverty' in col else col for col in df_combined.columns]
df_combined.columns = [col.replace('P2_At-least_basic_drinking_water', 'wash_wat') if 'P2_At-least_basic_drinking_water' in col else col for col in df_combined.columns]
df_combined.columns = [col.replace('P2_WASH_Drinking_Sanitation', 'wash_san') if 'P2_WASH_Drinking_Sanitation' in col else col for col in df_combined.columns]
df_combined.columns = [col.replace('P2_At-least_basic_sanitation', 'wash_san') if 'P2_At-least_basic_sanitation' in col else col for col in df_combined.columns]
df_combined.columns = [col.replace('P2_Basic_hygiene', 'wash_hyg') if 'P2_Basic_hygiene' in col else col for col in df_combined.columns]
df_combined.columns = [col.replace('P2_Lower_secondary_out_of_school', 'edu_lsos') if 'P2_Lower_secondary_out_of_school' in col else col for col in df_combined.columns]
df_combined.columns = [col.replace('P2_Lower_secondary_completion_rate', 'edu_lscr') if 'P2_Lower_secondary_completion_rate' in col else col for col in df_combined.columns]
df_combined.columns = [col.replace('P2_Learning_poverty', 'edu_lpov') if 'P2_Learning_poverty' in col else col for col in df_combined.columns]
df_combined.columns = [col.replace('P2_Child_labor', 'pro_lab') if 'P2_Child_labor' in col else col for col in df_combined.columns]
df_combined.columns = [col.replace('P2_Child_marriage', 'pro_mar') if 'P2_Child_marriage' in col else col for col in df_combined.columns]
df_combined.columns = [col.replace('P2_Child_poverty', 'pov_md') if 'P2_Child_poverty' in col else col for col in df_combined.columns]
df_combined.columns = [col.replace('P2_Under_five_covered_by_social_protection', 'pov_u5sp') if 'P2_Under_five_covered_by_social_protection' in col else col for col in df_combined.columns]
df_combined.columns = [col.replace('P2_Under_five_mortality', 'sur_u5mor') if 'P2_Under_five_mortality' in col else col for col in df_combined.columns]


In [None]:
df_combined.columns = [col.replace('value_norm', 'norm') if 'value_norm' in col else col for col in df_combined.columns]

In [None]:
# Rename some things
df_combined = df_combined.rename(columns={'name': 'adm_name', 'ISO3':'iso3'})


In [None]:
list(df_combined.columns)

['iso3',
 'agdr_abs_norm',
 'agdr_rel_norm',
 'metdr_spei_abs_norm',
 'metdr_spei_rel_norm',
 'pm25_abs_norm',
 'pm25_rel_norm',
 'ext_abs_norm',
 'ext_rel_norm',
 'metdr_spi_abs_norm',
 'metdr_spi_rel_norm',
 'cfl_abs_norm',
 'cfl_rel_norm',
 'hw_sev_abs_norm',
 'hw_sev_rel_norm',
 'hw_fre_abs_norm',
 'hw_fre_rel_norm',
 'fr_fre_abs_norm',
 'fr_fre_rel_norm',
 'hw_dur_abs_norm',
 'hw_dur_rel_norm',
 'fr_int_abs_norm',
 'fr_int_rel_norm',
 'rfl_abs_norm',
 'rfl_rel_norm',
 'sds_abs_norm',
 'sds_rel_norm',
 'ts_abs_norm',
 'ts_rel_norm',
 'mal_pf_abs_norm',
 'mal_pf_rel_norm',
 'mal_pv_abs_norm',
 'mal_pv_rel_norm',
 'hea_elec_norm',
 'hea_dtp1_norm',
 'hea_dtp3_norm',
 'hea_skat_norm',
 'nut_stu_norm',
 'nut_fpov_norm',
 'edu_lsos_norm',
 'edu_lscr_norm',
 'edu_lpov_norm',
 'pro_lab_norm',
 'pro_mar_norm',
 'pov_md_norm',
 'pov_u5sp_norm',
 'sur_u5mor_norm',
 'wash_hyg_norm',
 'wash_wat_norm',
 'wash_san_norm',
 'P2_arithmetic_avg',
 'P2_missing_val',
 'P1_geometric_avg',
 'ccri',
 'ra

In [None]:
df_combined

Unnamed: 0,iso3,agdr_abs_norm,agdr_rel_norm,metdr_spei_abs_norm,metdr_spei_rel_norm,pm25_abs_norm,pm25_rel_norm,ext_abs_norm,ext_rel_norm,metdr_spi_abs_norm,...,P1_pm25,P1_mal,iso3_y,P2_hea,P2_nut,P2_wash,P2_edu,P2_pro,P2_pov,P2_sur
0,AND,3.01,5.14,0.00,0.00,3.48,10.00,0.00,0.00,3.48,...,6.04,0.00,AND,0.00,,0.00,,,8.78,0.00
1,AGO,7.79,2.46,8.67,8.44,8.79,10.00,7.45,1.49,8.72,...,9.38,2.43,AGO,6.09,7.08,8.87,4.85,5.17,8.50,5.17
2,AFG,8.62,7.09,7.83,2.31,8.87,10.00,8.17,3.77,7.84,...,9.42,6.00,AFG,4.88,8.02,5.01,6.93,6.45,9.96,0.89
3,ATG,3.61,7.06,0.00,0.00,3.81,9.34,0.00,0.00,0.00,...,6.06,0.00,,,,,,,,
4,ARE,4.17,0.06,0.00,0.00,7.14,9.48,7.11,9.08,0.00,...,8.24,0.00,ARE,0.25,,0.00,2.41,,9.98,0.01
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
190,YEM,7.23,1.07,6.54,0.36,8.76,9.65,7.91,2.90,7.22,...,9.19,4.28,YEM,7.11,6.85,5.22,,5.71,,0.76
191,VUT,3.42,0.70,4.34,2.77,4.63,4.15,0.00,0.00,4.35,...,4.39,5.21,VUT,3.33,6.12,4.82,,3.55,8.71,0.33
192,VNM,8.81,6.88,8.60,5.14,8.98,8.69,8.63,5.31,8.76,...,8.83,1.85,VNM,0.06,2.45,0.85,1.59,5.36,9.86,1.03
193,UGA,8.83,7.90,6.81,0.39,8.98,9.78,7.34,0.92,7.08,...,9.37,2.45,UGA,2.65,4.16,7.94,6.16,10.00,4.59,2.12


In [None]:
df_combined = df_combined[[
    'iso3',
    'adm_name',
    'total_pop',
    'u18_pop',
    'wb_income',
    'unicef_ro',
    'ucode',
    'uuid',
    'geometry',
    'type',
    'fragile','rfl_abs',
 'rfl_rel','rfl_abs_norm',
 'rfl_rel_norm','cfl_abs',
 'cfl_rel','cfl_abs_norm',
 'cfl_rel_norm','ts_abs',
 'ts_rel','ts_abs_norm',
 'ts_rel_norm','agdr_abs',
 'agdr_rel','agdr_abs_norm',
 'agdr_rel_norm','metdr_spei_abs',
 'metdr_spei_rel','metdr_spei_abs_norm',
 'metdr_spei_rel_norm','metdr_spi_abs',
 'metdr_spi_rel','metdr_spi_abs_norm',
 'metdr_spi_rel_norm','hw_fre_abs',
 'hw_fre_rel','hw_fre_abs_norm',
 'hw_fre_rel_norm','hw_dur_abs',
 'hw_dur_rel','hw_dur_abs_norm',
 'hw_dur_rel_norm','hw_sev_abs',
 'hw_sev_rel','hw_sev_abs_norm',
 'hw_sev_rel_norm','ext_abs',
 'ext_rel','ext_abs_norm',
 'ext_rel_norm','fr_fre_abs',
 'fr_fre_rel','fr_fre_abs_norm',
 'fr_fre_rel_norm','fr_int_abs',
 'fr_int_rel','fr_int_abs_norm',
 'fr_int_rel_norm','sds_abs',
 'sds_rel','sds_abs_norm',
 'sds_rel_norm','pm25_abs',
 'pm25_rel','pm25_abs_norm',
 'pm25_rel_norm','mal_pv_abs',
 'mal_pv_rel','mal_pv_abs_norm',
 'mal_pv_rel_norm','mal_pf_abs',
 'mal_pf_rel','mal_pf_abs_norm',
 'mal_pf_rel_norm','hea_dtp1','hea_dtp1_norm', 'hea_dtp3','hea_dtp3_norm','hea_skat','hea_skat_norm',
'hea_elec','hea_elec_norm','nut_stu','nut_stu_norm','nut_fpov','nut_fpov_norm','wash_wat',
    'wash_wat_norm','wash_san','wash_san_norm','wash_hyg','wash_hyg_norm','edu_lsos','edu_lsos_norm',
    'edu_lscr','edu_lscr_norm','edu_lpov','edu_lpov_norm','pro_lab','pro_lab_norm','pro_mar',
    'pro_mar_norm','pov_md','pov_md_norm','pov_u5sp','pov_u5sp_norm','sur_u5mor','sur_u5mor_norm',
    'P1_rfl', 'P1_cfl', 'P1_ts', 'P1_dr', 'P1_hw','P1_ext', 'P1_fr', 'P1_sds', 'P1_pm25', 'P1_mal',
    'P2_hea', 'P2_nut','P2_wash', 'P2_edu', 'P2_pro', 'P2_pov', 'P2_sur','P1_geometric_avg','P2_arithmetic_avg', 'P2_missing_val',
    'mhi_TH75_abs', 'mhi_TH80_abs', 'mhi_TH85_abs', 'mhi_TH95_abs', 'mhi_TH90_abs',
    'mhc_ge1_abs', 'mhc_ge2_abs', 'mhc_ge3_abs', 'mhc_ge4_abs', 'mhc_ge5_abs',
    'mhc_ge6_abs', 'mhc_ge7_abs', 'mhc_ge8_abs', #'mhc_ge9_abs', 'mhc_ge10_abs',
    'mhi_TH75_rel', 'mhi_TH80_rel', 'mhi_TH85_rel', 'mhi_TH95_rel', 'mhi_TH90_rel',
    'mhc_ge1_rel', 'mhc_ge2_rel', 'mhc_ge3_rel', 'mhc_ge4_rel', 'mhc_ge5_rel',
    'mhc_ge6_rel', 'mhc_ge7_rel', 'mhc_ge8_rel', #'mhc_ge9_rel', 'mhc_ge10_rel',
'ccri']]

In [None]:
# List of columns to cap
exposure_cols = [
    'mhi_TH75_abs', 'mhi_TH80_abs', 'mhi_TH85_abs', 'mhi_TH95_abs', 'mhi_TH90_abs',
    'mhc_ge1_abs', 'mhc_ge2_abs', 'mhc_ge3_abs', 'mhc_ge4_abs', 'mhc_ge5_abs',
    'mhc_ge6_abs', 'mhc_ge7_abs', 'mhc_ge8_abs'#, 'mhc_ge9_abs', 'mhc_ge10_abs'
]

# Cap each exposure column at u18_pop
for col in exposure_cols:
    df_combined.loc[:, col] = np.minimum(df_combined[col], df_combined['u18_pop'])


In [None]:
df_combined = df_combined[df_combined['iso3'] != 'NIC']

In [None]:
for col in df_combined.columns:
    if 'abs' in col and 'norm' not in col:
        df_combined.loc[:, col] = df_combined[col].astype(int)


In [None]:
# remove rows with (ccri is null) OR (iso3 == 'PSE')
mask = df_combined['ccri'].isna() | (df_combined['iso3'].str.upper() == 'PSE')
df_combined = df_combined[~mask]


In [None]:
# df_iso = pd.read_csv('/content/drive/MyDrive/CCRI/ccri_repo/data/p1_exposure/heatwave_frequency_ecmwf_2014-2024_exposure_adm0.csv')
# valid_iso = df_iso[(df_iso['type'] == 'State') & (df_iso['area'] > 20000)].iso3.unique()

# df_combined = df_combined[df_combined['iso3'].isin(valid_iso)]


In [None]:
df_combined.loc[:,'total_pop'] = df_combined['total_pop'].round(0)
df_combined.loc[:,'u18_pop'] = df_combined['u18_pop'].round(0)
df_combined.loc[:,'total_pop'] = pd.to_numeric(df_combined['total_pop'], errors='coerce').astype('Int64')
df_combined.loc[:,'u18_pop'] = pd.to_numeric(df_combined['u18_pop'], errors='coerce').astype('Int64')

In [None]:
gdf = gpd.GeoDataFrame(df_combined, geometry=df_combined['geometry'], crs='EPSG:4326')

In [None]:
gdf.to_file('{}/CCRI_results_misc/CCRI_P1_P2_format.geojson'.format(data_dir))