### Load libraries

In [116]:
#load libraries
import pandas as pd
import numpy as np
import matplotlib as plt
import os
import seaborn as sns
import geopandas as gpd

### Read data

In [117]:
#read the ICER data
icer = pd.read_excel("../data/raw/ICER_data.xlsx")

In [118]:
#read in the data for life expectancy
le_district = pd.read_csv("../data/processed/le_combined.csv")
le_nat = pd.read_csv("../data/processed/le_calc_national.csv")
df_map = gpd.read_file("../data/processed/high_res.shp")

In [120]:
#rename first column as treatment
icer = icer.rename(columns={"Unnamed: 0": "Treatment"})

In [121]:
icer.head()

Unnamed: 0,Treatment,burden,qaly_lost,healthcare_cost,payer_cost
0,non_vaccine,382416,184088.54,26618814,16066984
1,vaccine,264217,127189.68,352828922,345497041


### Calculate district-level ICER based on the available ICER (average) from 2020-2024 in Indonesia (from research paper), nationwide, adjusted by life expectancy to obtain district-level ICER, assuming intervention costs remain approximately the same (similar) --> life expectancy is to adjust based on QALYs. All prices are adjusted to 2018 USD based on PPP and discounted by 3%

In [122]:
le_district.head()

Unnamed: 0,District,Year,Life Expectancy
0,ACEH,2022,70.18
1,SIMEULUE,2022,65.48
2,ACEH SINGKIL,2022,67.65
3,ACEH SELATAN,2022,64.64
4,ACEH TENGGARA,2022,68.48


In [123]:
#rename as district_life_exp
le_district = le_district.rename(columns={"Life Expectancy": "district_le"})

In [124]:
le_nat.head()

Unnamed: 0,Year,Life Expectancy
0,2019,69.213148
1,2020,69.363809
2,2021,69.474243
3,2022,69.747513
4,2023,70.018191


In [125]:
#rename as nat_le
le_nat = le_nat.rename(columns={"Life Expectancy": "nat_le"})

In [126]:
#merge le_district and le_nat by year
le_final = pd.merge(le_district, le_nat, on = "Year")

#replace all the NA values

#make sure all the life expectancy values are all floats
le_final["district_le"] = le_final["district_le"].astype(float)
le_final["nat_le"] = le_final["nat_le"].astype(float)

#calculate the le_factor, which is a comparison of the district-level to the national-level life expectancy
le_final["le_factor"] = le_final["district_le"] / le_final["nat_le"]

In [127]:
#create another column in df_map to calculate the discounting factor for each district in dki jakarta and west java
df = pd.merge(df_map, le_final, on = ("District", "Year"))

In [128]:
#remove the district_le and nat_le columns
df_final = df.drop(columns = ["district_le", "nat_le"])

In [129]:
#get per capita, yearly costs estimated from nationwide, aggregated data (2020-2024)
df_final["burden_non_vaccine"] = icer["burden"].iloc[0]
df_final["burden_vaccine"] = icer["burden"].iloc[1]
df_final["healthcare_cost_non_vaccine"] = icer["healthcare_cost"].iloc[0] / df_final["burden_non_vaccine"] / 5
df_final["healthcare_cost_vaccine"] = icer["healthcare_cost"].iloc[1] / df_final["burden_vaccine"] / 5
df_final["payer_cost_non_vaccine"] = icer["payer_cost"].iloc[0] / df_final["burden_non_vaccine"] / 5
df_final["payer_cost_vaccine"] = icer["payer_cost"].iloc[1] / df_final["burden_vaccine"] / 5


In [131]:
# Step 2: Province-level normalization factors
province_means = df_final.groupby("Province")[["urbanicity", "Pop_den", "Cases"]].mean().rename(columns={
    "urbanicity": "mean_urbanicity",
    "Pop_den": "mean_pop_den",
    "Cases": "mean_cases"
})

In [133]:
df_final = df_final.merge(province_means, on="Province", how="left")

In [134]:
# Step 3: Compute scaling factors
df_final["urbanicity_factor"] = df_final["urbanicity"] / df_final["mean_urbanicity"]
df_final["pop_den_factor"] = df_final["Pop_den"] / df_final["mean_pop_den"]

# Optional: Include case intensity as an extra scaling dimension (normalized)
df_final["case_factor"] = df_final["Cases"] / df_final["mean_cases"]

In [135]:
# Incremental cost per district, yearly (scaled by no of dengue cases in each district)
df_final["incremental_healthcare_cost"] = (
    df_final["healthcare_cost_vaccine"] - df_final["healthcare_cost_non_vaccine"]) * df_final["case_factor"] * df_final["urbanicity_factor"] * df_final["pop_den_factor"]

df_final["incremental_payer_cost"] = (
    df_final["payer_cost_vaccine"] - df_final["payer_cost_non_vaccine"]) * df_final["case_factor"] * df_final["urbanicity_factor"] * df_final["pop_den_factor"]

In [136]:
df_final.head()

Unnamed: 0,District,Year,Area_sq_km,HDI,Province,Cases,Population,Pop_den,urbanicity,geometry,...,payer_cost_non_vaccine,payer_cost_vaccine,mean_urbanicity,mean_pop_den,mean_cases,urbanicity_factor,pop_den_factor,case_factor,incremental_healthcare_cost,incremental_payer_cost
0,BANDUNG,2020,1767.96,0.7239,Jawa Barat,2295,3623790,2.049701,0.123155,"POLYGON ((107.73309 -6.814, 107.73354 -6.81427...",...,8.402883,261.525217,0.268587,3.910546,946.796296,0.458529,0.524147,2.423964,147.479087,147.460803
1,BANDUNG,2021,1767.96,0.7273,Jawa Barat,2002,3665655,2.073381,0.123155,"POLYGON ((107.73309 -6.814, 107.73354 -6.81427...",...,8.402883,261.525217,0.268587,3.910546,946.796296,0.458529,0.530202,2.114499,130.13688,130.120746
2,BANDUNG,2022,1767.96,0.7316,Jawa Barat,4191,3707523,2.097063,0.123155,"POLYGON ((107.73309 -6.814, 107.73354 -6.81427...",...,8.402883,261.525217,0.268587,3.910546,946.796296,0.458529,0.536258,4.426507,275.541008,275.506847
3,BANDUNG,2023,1767.96,0.7374,Jawa Barat,1005,3749391,2.120744,0.123155,"POLYGON ((107.73309 -6.814, 107.73354 -6.81427...",...,8.402883,261.525217,0.268587,3.910546,946.796296,0.458529,0.542314,1.061474,66.820777,66.812493
4,BANDUNG BARAT,2020,1305.77,0.6808,Jawa Barat,966,1788336,1.369564,0.079001,"POLYGON ((107.40945 -6.68851, 107.40986 -6.688...",...,8.402883,261.525217,0.268587,3.910546,946.796296,0.294136,0.350223,1.020283,26.6071,26.603802


### Save the df for other icer calculations

In [137]:
df_final.to_csv("../data/processed/cost_observed.csv")