In [None]:
import pandas as pd
import numpy as np
import os
from idd_forecast_mbp import constants as rfc
from idd_forecast_mbp.helper_functions import read_parquet_with_integer_ids, write_parquet


PROCESSED_DATA_PATH = rfc.MODEL_ROOT / "02-processed_data"
FORECASTING_DATA_PATH = rfc.FORECASTING_DATA_PATH

FHS_DATA_PATH = f"{PROCESSED_DATA_PATH}/age_specific_fhs"

age_metadata_path = f"{FHS_DATA_PATH}/age_metadata.parquet"
age_metadata_df = read_parquet_with_integer_ids(age_metadata_path)



dengue_vaccine_df_path = f"{FORECASTING_DATA_PATH}/dengue_vaccine_df.parquet"

by_age_df_template = pd.DataFrame({
    'age': list(range(10, 90)),
    **{f'year_{year}': [0.0] * 80 for year in range(2023, 2101)}
})

sum_age_groups = age_metadata_df[(age_metadata_df['age_group_years_start'] >= 10) & (age_metadata_df['age_group_years_start'] < 90)]
by_age_group_df_template = pd.DataFrame({
    'age_group_id': list(sum_age_groups['age_group_id']),
    'age_group_name': list(sum_age_groups['age_group_name']),
    'age_group_years_start': list(sum_age_groups['age_group_years_start']),
    'age_group_years_end': list(sum_age_groups['age_group_years_end']),
    **{f'year_{year}': [0.0] * len(sum_age_groups['age_group_id']) for year in range(2023, 2101)}
})

def calculate_expanding_cohort_fraction(efficacy = 0.844, coverage = 0.9, year_start = 2026, initial_min_age=10, initial_max_age=15, 
    by_age_df_template=by_age_df_template, by_age_group_df_template = by_age_group_df_template):
    #
    by_age_df = by_age_df_template.copy()
    coverage_by_age_group_df = by_age_group_df_template.copy()
    reduction_by_age_group_df = by_age_group_df_template.copy()
    first_year = f'year_{year_start}'
    # Set the values in the first_year column for age between initial_min_age and initial_max_age to coverage
    by_age_df.loc[(by_age_df['age'] >= initial_min_age) & (by_age_df['age'] < initial_max_age), first_year] = coverage
    for year in range(year_start + 1, 2101):
        # Grab the previous year column and shift the values down by 1 year
        previous_year = f'year_{year - 1}'
        current_year = f'year_{year}'
        by_age_df[current_year] = by_age_df[previous_year].shift(1)
        # Vaccinate the initial_min_age group
        by_age_df.loc[by_age_df['age'] == initial_min_age, current_year] = coverage

    # For each year, calculate the fraction of the population that is vaccinated by age group and insert that average into coverage_by_age_group_df
    for year in range(year_start, 2101):
        for age_group_id in coverage_by_age_group_df['age_group_id']:
            # Get the age group start and end years
            age_group_start = age_metadata_df[age_metadata_df['age_group_id'] == age_group_id]['age_group_years_start'].values[0]
            age_group_end = age_metadata_df[age_metadata_df['age_group_id'] == age_group_id]['age_group_years_end'].values[0]
            # Get the ages in the age group
            ages_in_age_group = by_age_df[(by_age_df['age'] >= age_group_start) & (by_age_df['age'] < age_group_end)]
            # Calculate the average vaccination coverage for the age group
            average_coverage = ages_in_age_group[f'year_{year}'].mean()
            # Set the value in the by_age_group_df_template for that year and age group
            coverage_by_age_group_df.loc[coverage_by_age_group_df['age_group_id'] == age_group_id, f'year_{year}'] = average_coverage
    
    year_cols = [col for col in coverage_by_age_group_df.columns if col.startswith('year_')]
    for col in year_cols:  # Fixed: added the loop
        reduction_by_age_group_df[col] = 1 - coverage_by_age_group_df[col] * efficacy

    return by_age_df, coverage_by_age_group_df, reduction_by_age_group_df

df, ag_df, red_df = calculate_expanding_cohort_fraction()

write_parquet(red_df, dengue_vaccine_df_path)