In [56]:
import xarray as xr # type: ignore
from pathlib import Path
import numpy as np # type: ignore
from typing import cast
import numpy.typing as npt # type: ignore
import pandas as pd # type: ignore
from typing import Literal, NamedTuple
import itertools
from rra_tools.shell_tools import mkdir # type: ignore
from idd_forecast_mbp import constants as rfc
from idd_forecast_mbp.helper_functions import merge_dataframes, read_income_paths, read_urban_paths, level_filter
from idd_forecast_mbp.parquet_functions import read_parquet_with_integer_ids, write_parquet

In [57]:
PROCESSED_DATA_PATH = rfc.MODEL_ROOT / "02-processed_data"

# Hierarchy path
hierarchy_df_path = f'{PROCESSED_DATA_PATH}/full_hierarchy_lsae_1209.parquet'
hierarchy_df = read_parquet_with_integer_ids(hierarchy_df_path)

# All-age population
aa_full_population_df_path = f"{PROCESSED_DATA_PATH}/aa_2023_full_population.parquet"

reference_path = "/mnt/share/resource_tracking/forecasting/dah_channel_HFA/FGH_2024_submission_5_reference/dah_by_channel_hfa_recip_1990_2100.csv"
better_path = "/mnt/share/resource_tracking/forecasting/dah_channel_HFA/FGH_2024_submission_5_better/dah_by_channel_hfa_recip_1990_2100.csv"
worse_path = "/mnt/share/resource_tracking/forecasting/dah_channel_HFA/FGH_2024_submission_5_worse/dah_by_channel_hfa_recip_1990_2100.csv"

dah_refernce_df_path = f"{PROCESSED_DATA_PATH}/dah_reference_df.parquet"
dah_better_df_path = f"{PROCESSED_DATA_PATH}/dah_better_df.parquet"
dah_worse_df_path = f"{PROCESSED_DATA_PATH}/dah_worse_df.parquet"

ref_df = pd.read_csv(reference_path, index_col=0)
# subset to hfa = mal, rename dah to mal_DAH_total and year to year_id
ref_df = ref_df[(ref_df['hfa'] == 'mal') & (ref_df['year'] >= 2000)].rename({'dah': 'mal_DAH_total', 'year': 'year_id'}, axis=1).copy()
ref_df = ref_df[['year_id', 'mal_DAH_total', 'location_id']].groupby(['year_id', 'location_id']).agg({'mal_DAH_total': 'sum'}).reset_index()
better_df = pd.read_csv(better_path, index_col=0)
better_df = better_df[(better_df['hfa'] == 'mal') & (better_df['year'] >= 2000)].rename({'dah': 'mal_DAH_total', 'year': 'year_id'}, axis=1).copy()
better_df = better_df[['year_id', 'mal_DAH_total', 'location_id']].groupby(['year_id', 'location_id']).agg({'mal_DAH_total': 'sum'}).reset_index()
worse_df = pd.read_csv(worse_path, index_col=0)
worse_df = worse_df[(worse_df['hfa'] == 'mal') & (worse_df['year'] >= 2000)].rename({'dah': 'mal_DAH_total', 'year': 'year_id'}, axis=1).copy()
worse_df = worse_df[['year_id', 'mal_DAH_total', 'location_id']].groupby(['year_id', 'location_id']).agg({'mal_DAH_total': 'sum'}).reset_index()

In [58]:
#
a0_location_ids = hierarchy_df[hierarchy_df['level'] == 3]['location_id'].unique()
a0_filter = ('location_id', 'in', a0_location_ids)
a0_pop_df = read_parquet_with_integer_ids(aa_full_population_df_path,
                                             filters=[a0_filter])
#
a2_location_ids = hierarchy_df[hierarchy_df['level'] == 5]['location_id'].unique()
a2_filter = ('location_id', 'in', a2_location_ids)
#
a2_pop_df = read_parquet_with_integer_ids(aa_full_population_df_path,
                                             filters=[a2_filter])

In [59]:
ref_df = ref_df.merge(a0_pop_df, on=['location_id', 'year_id'], how='left')
ref_df = ref_df.rename({'location_id': 'A0_location_id'}, axis=1)
ref_df['mal_DAH_total_per_capita'] = ref_df['mal_DAH_total'] / ref_df['population']
ref_df = ref_df.drop(columns=['population'])
better_df = better_df.merge(a0_pop_df, on=['location_id', 'year_id'], how='left')
better_df = better_df.rename({'location_id': 'A0_location_id'}, axis=1)
better_df['mal_DAH_total_per_capita'] = better_df['mal_DAH_total'] / better_df['population']
better_df = better_df.drop(columns=['population'])
worse_df = worse_df.merge(a0_pop_df, on=['location_id', 'year_id'], how='left')
worse_df['mal_DAH_total_per_capita'] = worse_df['mal_DAH_total'] / worse_df['population']
worse_df = worse_df.rename({'location_id': 'A0_location_id'}, axis=1)
worse_df = worse_df.drop(columns=['population'])

In [60]:
dah_reference_df = a2_pop_df.merge(hierarchy_df[['location_id', 'A0_location_id']], on='location_id', how='left').copy()
dah_reference_df = dah_reference_df.merge(ref_df, on=['A0_location_id', 'year_id'], how='left')

dah_better_df = a2_pop_df.merge(hierarchy_df[['location_id', 'A0_location_id']], on='location_id', how='left').copy()
dah_better_df = dah_better_df.merge(better_df, on=['A0_location_id', 'year_id'], how='left')

dah_worse_df = a2_pop_df.merge(hierarchy_df[['location_id', 'A0_location_id']], on='location_id', how='left').copy()
dah_worse_df = dah_worse_df.merge(worse_df, on=['A0_location_id', 'year_id'], how='left')

In [61]:
write_parquet(dah_reference_df, dah_refernce_df_path)
write_parquet(dah_better_df, dah_better_df_path)
write_parquet(dah_worse_df, dah_worse_df_path)

✅ Metadata validation passed for /mnt/team/idd/pub/forecast-mbp/02-processed_data/dah_reference_df.parquet
✅ Metadata validation passed for /mnt/team/idd/pub/forecast-mbp/02-processed_data/dah_better_df.parquet
✅ Metadata validation passed for /mnt/team/idd/pub/forecast-mbp/02-processed_data/dah_worse_df.parquet


True