<a href="https://colab.research.google.com/github/dohyung-kim/ccri/blob/main/script/adm0/exposure_by_th_adm0.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [3]:
import numpy as np
import pandas as pd
import geopandas as gpd
import os

In [4]:

data_dir = '/content/drive/MyDrive/CCRI_results_misc'

# Step 1: Read the CSV file containing the ccri_score
ccri_score = pd.read_csv(os.path.join(data_dir,'p1_p2_avg_ccri.csv'))

# Step 2: Read the CSV file for child population data
child_pop_sum = pd.read_csv(os.path.join(data_dir,'child_pop_sum_adm0.csv'))

# Step 3: Group the child population data by 'ISO3' and sum the values
child_pop_sum_grouped = child_pop_sum.groupby('ISO3', as_index=False)['child_population'].sum()

# Step 4: Merge the ccri_score DataFrame with the grouped child population data on 'ISO3'
ccri_score_pop = ccri_score.merge(child_pop_sum_grouped, on='ISO3', how='left')

# Step 5: Extract the ccri values and calculate statistics
ccri_values = ccri_score_pop['P1_P2_geometric_avg'].values

# Calculate mean, std, and percentiles
mean_ccri = np.nanmean(ccri_values)
std_ccri = np.nanstd(ccri_values)

mean_plus_1sd_ccri = mean_ccri + std_ccri
mean_plus_2sd_ccri = mean_ccri + 2 * std_ccri

percentile_75_ccri = np.nanpercentile(ccri_values, 75)
percentile_90_ccri = np.nanpercentile(ccri_values, 90)
percentile_95_ccri = np.nanpercentile(ccri_values, 95)
percentile_99_ccri = np.nanpercentile(ccri_values, 99)

# Step 6: Identify ISO3 values with higher ccri_score than each threshold
higher_than_mean = ccri_score_pop[ccri_score_pop['P1_P2_geometric_avg'] > mean_ccri]
higher_than_mean_plus_1sd = ccri_score_pop[ccri_score_pop['P1_P2_geometric_avg'] > mean_plus_1sd_ccri]
higher_than_mean_plus_2sd = ccri_score_pop[ccri_score_pop['P1_P2_geometric_avg'] > mean_plus_2sd_ccri]
higher_than_75 = ccri_score_pop[ccri_score_pop['P1_P2_geometric_avg'] > percentile_75_ccri]
higher_than_90 = ccri_score_pop[ccri_score_pop['P1_P2_geometric_avg'] > percentile_90_ccri]
higher_than_95 = ccri_score_pop[ccri_score_pop['P1_P2_geometric_avg'] > percentile_95_ccri]
higher_than_99 = ccri_score_pop[ccri_score_pop['P1_P2_geometric_avg'] > percentile_99_ccri]

# Step 7: Calculate sum of child population for each case
sum_higher_than_mean = higher_than_mean['child_population'].sum()
sum_higher_than_mean_plus_1sd = higher_than_mean_plus_1sd['child_population'].sum()
sum_higher_than_mean_plus_2sd = higher_than_mean_plus_2sd['child_population'].sum()
sum_higher_than_75 = higher_than_75['child_population'].sum()
sum_higher_than_90 = higher_than_90['child_population'].sum()
sum_higher_than_95 = higher_than_95['child_population'].sum()
sum_higher_than_99 = higher_than_99['child_population'].sum()

# Print the results including sums
print(f"Mean: {mean_ccri}")
print(f"ISO3 with higher than Mean:")
print(higher_than_mean[['ISO3', 'P1_P2_geometric_avg', 'child_population']])
print(f"Total child population for ISO3 with higher than Mean: {sum_higher_than_mean}")

print(f"\nMean + 1 Standard Deviation: {mean_plus_1sd_ccri}")
print(f"ISO3 with higher than Mean + 1 SD:")
print(higher_than_mean_plus_1sd[['ISO3', 'P1_P2_geometric_avg', 'child_population']])
print(f"Total child population for ISO3 with higher than Mean + 1 SD: {sum_higher_than_mean_plus_1sd}")

print(f"\nMean + 2 Standard Deviations: {mean_plus_2sd_ccri}")
print(f"ISO3 with higher than Mean + 2 SD:")
print(higher_than_mean_plus_2sd[['ISO3', 'P1_P2_geometric_avg', 'child_population']])
print(f"Total child population for ISO3 with higher than Mean + 2 SD: {sum_higher_than_mean_plus_2sd}")

print(f"\n75th Percentile: {percentile_75_ccri}")
print(f"ISO3 with higher than 75th percentile:")
print(higher_than_75[['ISO3', 'P1_P2_geometric_avg', 'child_population']])
print(f"Total child population for ISO3 with higher than 75th percentile: {sum_higher_than_75}")

print(f"\n90th Percentile: {percentile_90_ccri}")
print(f"ISO3 with higher than 90th percentile:")
print(higher_than_90[['ISO3', 'P1_P2_geometric_avg', 'child_population']])
print(f"Total child population for ISO3 with higher than 90th percentile: {sum_higher_than_90}")

print(f"\n95th Percentile: {percentile_95_ccri}")
print(f"ISO3 with higher than 95th percentile:")
print(higher_than_95[['ISO3', 'P1_P2_geometric_avg', 'child_population']])
print(f"Total child population for ISO3 with higher than 95th percentile: {sum_higher_than_95}")

print(f"\n99th Percentile: {percentile_99_ccri}")
print(f"ISO3 with higher than 99th percentile:")
print(higher_than_99[['ISO3', 'P1_P2_geometric_avg', 'child_population']])
print(f"Total child population for ISO3 with higher than 99th percentile: {sum_higher_than_99}")


Mean: 3.193700120922024
ISO3 with higher than Mean:
    ISO3  P1_P2_geometric_avg  child_population
1    AFG             9.977203      2.094996e+07
2    AGO             6.308087      1.897740e+07
5    ALB             4.092640      5.517687e+05
7    ARE             6.167410      1.982480e+06
8    ARG             6.514191      1.197686e+07
..   ...                  ...               ...
241  VNM             8.846483      2.776173e+07
245  YEM             7.048117      1.890482e+07
246  ZAF             7.397318      1.967573e+07
247  ZMB             4.744616      1.025336e+07
248  ZWE             7.974130      7.950134e+06

[92 rows x 3 columns]
Total child population for ISO3 with higher than Mean: 2125254662.012865

Mean + 1 Standard Deviation: 6.327020351057811
ISO3 with higher than Mean + 1 SD:
    ISO3  P1_P2_geometric_avg  child_population
1    AFG             9.977203      2.094996e+07
8    ARG             6.514191      1.197686e+07
19   BEN             8.166011      6.912235e+06
2