### Load libraries

In [1]:
#load libraries
import pandas as pd
import numpy as np
import matplotlib as plt
import os
import seaborn as sns
import geopandas as gpd

### Read data

In [2]:
#read the ICER data
icer = pd.read_excel("../data/raw/ICER_data.xlsx")

In [3]:
#read in the data for life expectancy
le_district = pd.read_csv("../data/processed/le_combined.csv")
le_nat = pd.read_csv("../data/processed/le_calc_national.csv")
df_map = gpd.read_file("../data/processed/jkt_dist.shp")

In [4]:
#rename first column as treatment
icer = icer.rename(columns={"Unnamed: 0": "Treatment"})

In [5]:
icer.head()

Unnamed: 0,Treatment,burden,qaly_lost,healthcare_cost,payer_cost
0,non_vaccine,382416,184088.54,26618814,16066984
1,vaccine,264217,127189.68,352828922,345497041


### Calculate district-level ICER based on the available ICER (average) from 2020-2024 in Indonesia (from research paper), nationwide, adjusted by life expectancy to obtain district-level ICER, assuming intervention costs remain approximately the same (similar) --> life expectancy is to adjust based on QALYs. All prices are adjusted to 2018 USD based on PPP and discounted by 3%

In [6]:
le_district.head()

Unnamed: 0,District,Year,Life Expectancy
0,ACEH,2022,70.18
1,SIMEULUE,2022,65.48
2,ACEH SINGKIL,2022,67.65
3,ACEH SELATAN,2022,64.64
4,ACEH TENGGARA,2022,68.48


In [7]:
#rename as district_life_exp
le_district = le_district.rename(columns={"Life Expectancy": "district_le"})

In [8]:
le_nat.head()

Unnamed: 0,Year,Life Expectancy
0,2019,69.213148
1,2020,69.363809
2,2021,69.474243
3,2022,69.747513
4,2023,70.018191


In [9]:
#rename as nat_le
le_nat = le_nat.rename(columns={"Life Expectancy": "nat_le"})

In [10]:
#merge le_district and le_nat by year
le_final = pd.merge(le_district, le_nat, on = "Year")

#replace all the NA values

#make sure all the life expectancy values are all floats
le_final["district_le"] = le_final["district_le"].astype(float)
le_final["nat_le"] = le_final["nat_le"].astype(float)

#calculate the le_factor, which is a comparison of the district-level to the national-level life expectancy
le_final["le_factor"] = le_final["district_le"] / le_final["nat_le"]

In [11]:
#create another column in df_map to calculate the discounting factor for each district in dki jakarta and west java
df = pd.merge(df_map, le_final, on = ("District", "Year"))

In [12]:
#remove the district_le and nat_le columns
df_final = df.drop(columns = ["district_le", "nat_le"])

In [13]:
#get per capita, yearly costs estimated from nationwide, aggregated data (2020-2024)
df_final["burden_non_vaccine"] = icer["burden"].iloc[0]
df_final["burden_vaccine"] = icer["burden"].iloc[1]
df_final["healthcare_cost_non_vaccine"] = icer["healthcare_cost"].iloc[0] / df_final["burden_non_vaccine"] / 5
df_final["healthcare_cost_vaccine"] = icer["healthcare_cost"].iloc[1] / df_final["burden_vaccine"] / 5
df_final["payer_cost_non_vaccine"] = icer["payer_cost"].iloc[0] / df_final["burden_non_vaccine"] / 5
df_final["payer_cost_vaccine"] = icer["payer_cost"].iloc[1] / df_final["burden_vaccine"] / 5


In [14]:
# Step 2: Province-level normalization factors
province_means = df_final.groupby("Province")[["urbanicity", "Pop_den"]].mean().rename(columns={
    "urbanicity": "mean_urbanicity",
    "Pop_den": "mean_pop_den"
})

In [15]:
df_final = df_final.merge(province_means, on="Province", how="left")

In [16]:
# Step 3: Compute scaling factors
df_final["urbanicity_factor"] = df_final["urbanicity"] / df_final["mean_urbanicity"]
df_final["pop_den_factor"] = df_final["Pop_den"] / df_final["mean_pop_den"]

# Optional: Include case intensity as an extra scaling dimension (normalized) - must compute by differences in the total cases (predicted)
df_final["obs_prevalence"] = df_final["Cases"] / df_final["Population"]
mean_obs_prev = df_final["obs_prevalence"].mean()
df_final["prev_factor"] = df_final["obs_prevalence"] / mean_obs_prev

In [17]:
# Incremental cost per district, yearly (scaled by no of dengue cases in each district)
df_final["obs_incremental_healthcare_cost"] = (
    df_final["healthcare_cost_vaccine"] - df_final["healthcare_cost_non_vaccine"]) * df_final["prev_factor"] * df_final["urbanicity_factor"] * df_final["pop_den_factor"]

df_final["obs_incremental_payer_cost"] = (
    df_final["payer_cost_vaccine"] - df_final["payer_cost_non_vaccine"]) * df_final["prev_factor"] * df_final["urbanicity_factor"] * df_final["pop_den_factor"]

In [18]:
df_final.head()

Unnamed: 0,District,Year,Area_sq_km,HDI,Province,Cases,Population,Pop_den,urbanicity,geometry,...,payer_cost_non_vaccine,payer_cost_vaccine,mean_urbanicity,mean_pop_den,urbanicity_factor,pop_den_factor,obs_prevalence,prev_factor,obs_incremental_healthcare_cost,obs_incremental_payer_cost
0,KOTA JAKARTA BARAT,2020,124.44,0.8138,DKI Jakarta,161,184826,1.485262,0.951917,"POLYGON ((106.70503 -6.0956, 106.70526 -6.0956...",...,8.402883,261.525217,0.939717,1.256437,1.012982,1.182122,0.000871,0.465595,141.14232,141.124821
1,KOTA JAKARTA BARAT,2021,124.44,0.8176,DKI Jakarta,138,183270,1.472758,0.951917,"POLYGON ((106.70503 -6.0956, 106.70526 -6.0956...",...,8.402883,261.525217,0.939717,1.256437,1.012982,1.17217,0.000753,0.40247,120.979131,120.964133
2,KOTA JAKARTA BARAT,2022,125.0,0.8251,DKI Jakarta,420,181715,1.45372,0.951917,"POLYGON ((106.70503 -6.0956, 106.70526 -6.0956...",...,8.402883,261.525217,0.939717,1.256437,1.012982,1.157018,0.002311,1.235391,366.547831,366.502388
3,KOTA JAKARTA BARAT,2023,125.0,0.8302,DKI Jakarta,359,180159,1.441272,0.951917,"POLYGON ((106.70503 -6.0956, 106.70526 -6.0956...",...,8.402883,261.525217,0.939717,1.256437,1.012982,1.14711,0.001993,1.065085,313.311122,313.272279
4,KOTA JAKARTA PUSAT,2020,52.38,0.8139,DKI Jakarta,127,73140,1.396334,0.993514,"POLYGON ((106.83247 -6.14138, 106.83282 -6.141...",...,8.402883,261.525217,0.939717,1.256437,1.057248,1.111344,0.001736,0.9281,276.060659,276.026434


In [19]:
#drop burden_vaccine and burden_non_vaccine --> don't need it anymore, scale by ratio of dengue case counts (district to province level) --> using predicted and actual values
df_final.drop(columns = ["burden_non_vaccine", "burden_vaccine", "prev_factor"])

Unnamed: 0,District,Year,Area_sq_km,HDI,Province,Cases,Population,Pop_den,urbanicity,geometry,...,healthcare_cost_vaccine,payer_cost_non_vaccine,payer_cost_vaccine,mean_urbanicity,mean_pop_den,urbanicity_factor,pop_den_factor,obs_prevalence,obs_incremental_healthcare_cost,obs_incremental_payer_cost
0,KOTA JAKARTA BARAT,2020,124.44,0.8138,DKI Jakarta,161,184826,1.485262,0.951917,"POLYGON ((106.70503 -6.0956, 106.70526 -6.0956...",...,267.07511,8.402883,261.525217,0.939717,1.256437,1.012982,1.182122,0.000871,141.14232,141.124821
1,KOTA JAKARTA BARAT,2021,124.44,0.8176,DKI Jakarta,138,183270,1.472758,0.951917,"POLYGON ((106.70503 -6.0956, 106.70526 -6.0956...",...,267.07511,8.402883,261.525217,0.939717,1.256437,1.012982,1.17217,0.000753,120.979131,120.964133
2,KOTA JAKARTA BARAT,2022,125.0,0.8251,DKI Jakarta,420,181715,1.45372,0.951917,"POLYGON ((106.70503 -6.0956, 106.70526 -6.0956...",...,267.07511,8.402883,261.525217,0.939717,1.256437,1.012982,1.157018,0.002311,366.547831,366.502388
3,KOTA JAKARTA BARAT,2023,125.0,0.8302,DKI Jakarta,359,180159,1.441272,0.951917,"POLYGON ((106.70503 -6.0956, 106.70526 -6.0956...",...,267.07511,8.402883,261.525217,0.939717,1.256437,1.012982,1.14711,0.001993,313.311122,313.272279
4,KOTA JAKARTA PUSAT,2020,52.38,0.8139,DKI Jakarta,127,73140,1.396334,0.993514,"POLYGON ((106.83247 -6.14138, 106.83282 -6.141...",...,267.07511,8.402883,261.525217,0.939717,1.256437,1.057248,1.111344,0.001736,276.060659,276.026434
5,KOTA JAKARTA PUSAT,2021,52.38,0.8156,DKI Jakarta,54,73417,1.401623,0.993514,"POLYGON ((106.83247 -6.14138, 106.83282 -6.141...",...,267.07511,8.402883,261.525217,0.939717,1.256437,1.057248,1.115553,0.000736,117.380123,117.36557
6,KOTA JAKARTA PUSAT,2022,47.56,0.8211,DKI Jakarta,198,73694,1.549495,0.993514,"POLYGON ((106.83247 -6.14138, 106.83282 -6.141...",...,267.07511,8.402883,261.525217,0.939717,1.256437,1.057248,1.233245,0.002687,474.012329,473.953563
7,KOTA JAKARTA PUSAT,2023,47.56,0.8253,DKI Jakarta,152,73972,1.555341,0.993514,"POLYGON ((106.83247 -6.14138, 106.83282 -6.141...",...,267.07511,8.402883,261.525217,0.939717,1.256437,1.057248,1.237898,0.002055,363.888252,363.843139
8,KOTA JAKARTA SELATAN,2020,154.32,0.8472,DKI Jakarta,288,160968,1.043079,0.972088,"POLYGON ((106.83399 -6.20492, 106.83399 -6.204...",...,267.07511,8.402883,261.525217,0.939717,1.256437,1.034448,0.830188,0.001789,207.906672,207.880897
9,KOTA JAKARTA SELATAN,2021,154.32,0.849,DKI Jakarta,281,159201,1.031629,0.972088,"POLYGON ((106.83399 -6.20492, 106.83399 -6.204...",...,267.07511,8.402883,261.525217,0.939717,1.256437,1.034448,0.821075,0.001765,202.853385,202.828236


### Save the df for other icer calculations

In [20]:
df_final.to_csv("../data/processed/cost_observed.csv")