In [2]:
import pandas as pd
import numpy as np, os
import matplotlib.pyplot as plt

from pathlib import Path
import yaml
import re

import gbd_mapping as gm
from vivarium import Artifact

from db_queries import get_ids, get_outputs, get_population, get_covariate_estimates
from get_draws.api import get_draws

import vivarium_helpers as vh
import vivarium_helpers.id_helper as idh
from vivarium_helpers.vph_output.operations import VPHOperator

!date
!whoami
!pwd

Thu Oct 30 20:40:55 PDT 2025
ndbs
/mnt/share/code/ndbs/vivarium_research_alzheimers


# Load Needed Data

In [3]:
# Project directory
%cd /mnt/team/simulation_science/pub/models/vivarium_csu_alzheimers/

/mnt/team/simulation_science/pub/models/vivarium_csu_alzheimers


In [4]:
locations = [
    'United States of America',
    'Brazil',
    # 'China',
    # 'Germany',
    # 'Israel',
    # 'Japan',
    # 'Spain',
    # 'Sweden',
    # 'Taiwan (Province of China)',
    # 'United Kingdom',
]

# Define some shorter names to use for plotting
location_to_short_name = ({loc: loc for loc in locations}| {
    'Taiwan (Province of China)': 'Taiwan',
    'United Kingdom': 'UK',
    'United States of America': 'USA',
})

# # Select a subset of locations to draw plots for
# locations_to_plot = locations[:2]

project_dir = '/mnt/team/simulation_science/pub/models/vivarium_csu_alzheimers/'

model_number = '8.3' # Artifacts are stored here
run_subdirectories = [
    'results/abie_consistent_model_test/united_states_of_america/2025_10_28_08_55_05/',
]
run_dirs = [project_dir + run_subdir for run_subdir in run_subdirectories]
results_dirs = [run_dir + 'results/' for run_dir in run_dirs]

# # Option 1: One results directory per location
# location_to_results_dir = {
#     loc: path for loc, path in zip(locations, results_dirs)}

# Option 2: All locations in one results directory
location_to_results_dir = {'all': results_dirs[0]}

location_to_artifact_subdir = {loc: loc.lower().replace(' ', '_') for loc in locations}
artifact_subpaths = [f'artifacts/model{model_number}/' + subdir + '.hdf' for subdir in location_to_artifact_subdir.values()]
location_to_artifact_path = {loc: project_dir + subpath for loc, subpath in zip(locations, artifact_subpaths)}
artifact_path_to_location = {path: loc for loc, path in location_to_artifact_path.items()}
# artifact_path_to_location = {project_dir + subpath: loc for subpath, loc in zip(artifact_subpaths, locations)}
artifact_path_to_location

{'/mnt/team/simulation_science/pub/models/vivarium_csu_alzheimers/artifacts/model8.3/united_states_of_america.hdf': 'United States of America',
 '/mnt/team/simulation_science/pub/models/vivarium_csu_alzheimers/artifacts/model8.3/brazil.hdf': 'Brazil'}

# Get list of draws and draw columns from `keyspace.yaml`, and reduce to a subset of draws to save memory and time

In [5]:
with open(run_dirs[0] + 'keyspace.yaml', 'r') as keyspace_file:
    keyspace = yaml.safe_load(keyspace_file)
draws = keyspace['input_draw']
print(draws)

[457, 169, 323, 392, 346]


In [6]:
# I ended up never actually using this -- I just used all the draws
draws = sorted(draws[:10]) # reduce to a subset of draws to save memory, and sort
draw_cols = [f'draw_{i}' for i in draws]
print(draw_cols)

['draw_169', 'draw_323', 'draw_346', 'draw_392', 'draw_457']


# Load one artifact and define age bins

In [7]:
usa_artifact_path = location_to_artifact_path['United States of America']
usa_art = Artifact(usa_artifact_path)
# print(usa_art.load('metadata.locations'))
# print(usa_art)

In [8]:
# age_bins is an empty DataFrame with a MultiIndex storing age group data
age_bins = usa_art.load('population.age_bins')
age_dictionary = (
    age_bins
    .reset_index()
    .assign(age_group=lambda df: df['age_group_name'].str.replace(' ', '_'))
    # Filter to ages that actually appear in our sim
    .query("age_start >= 25")
)
#age_dictionary

In [9]:
scale = pd.DataFrame()
for location in locations:
    artifact_path = location_to_artifact_path[location]
    art = Artifact(artifact_path)
    temp = art.load('population.structure').reset_index() 
    temp['location'] = location
    
    df_prev_pop = pd.merge(
        art.load('population.scaling_factor').query("year_start == 2025"),
        art.load('population.structure').query("year_start==2025").droplevel(['year_start', 'year_end']),
        left_index=True,
        right_index=True,
        suffixes=['_prev', '_pop']
    )
    prev = ((df_prev_pop.filter(like='draw_').filter(like='_prev')
            * df_prev_pop.filter(like='draw_').filter(like='_pop').values).mean(axis=1)).sum(axis=0)
    # TODO: use draw-specific scale instead of mean
    
    ratio = 100_000 / prev
    print(ratio)

    temp['ratio'] = ratio

    temp = temp.rename(columns={'year_start': 'event_year'})
    temp = temp.merge(age_dictionary, on=['age_start','age_end'])
    mini = temp.loc[temp['event_year'] == 2050]
    for year in range(2051, 2100):
        temp = pd.concat([temp, mini.assign(event_year=year)], ignore_index=True)
    scale = pd.concat([scale, temp], ignore_index=True)
#scale.head()

0.02068399173333995
0.05127553537168728


# Create VPHOperator object to perform operations on simulation output

In [10]:
ops = VPHOperator()
ops.index_cols.append('location')
ops.index_cols

vh.vph_output.operations.INDEX_COLUMNS

['input_draw', 'scenario']

# BBBM Test Counts

In [11]:
def load_sim_output(
        measure,
        results_dict=location_to_results_dir,
        artifact_path_to_location=artifact_path_to_location,
        drop_superfluous_cols=True, # drop redundant or empty columns
        force_categorical=True,
        aggregate_seeds=True,
        raw=False, # Overrides other parameters if True
        **kwargs, # keyword args to pass to .read_parquet
    ):
    """Load simulation output from .parquet files for all locations,
    optionally reducing the size of the data when possible. Returns
    concatenated outputs with a 'location' column added.
    """
    if raw:
        drop_superfluous_cols = False
        force_categorical = False
        aggregate_seeds = False

    dfs = []
    for location, directory in results_dict.items():
        df = pd.read_parquet(Path(directory) / f'{measure}.parquet', **kwargs)
        if drop_superfluous_cols:
            # Drop redundant columns
            for col1, col2 in [
                ('input_draw', 'input_draw_number'),
                ('entity', 'sub_entity'),
            ]:
                if (col1 in df and col2 in df and df[col1].equals(df[col2])):
                    df.drop(columns=col2, inplace=True)
            # Drop empty columns
            for col in df:
                if df[col].isna().all():
                    df.drop(columns=col, inplace=True)
        if force_categorical:
            convert_to_categorical(df, inplace=True)
        if aggregate_seeds:
            # Use default index and value columns when aggregating
            df = vh.vph_output.operations.marginalize(df, 'random_seed')
        # if location == 'all':
        if 'artifact_path' in df:
            df['location'] = df['artifact_path'].map(artifact_path_to_location)
        # else:
        #     df['location'] = location
        dfs.append(df)
    df = pd.concat(dfs)
    return df

# TODO: Consider making certain columns ordered Categoricals
def convert_to_categorical(df, inplace=False):
    """Convert all columns except float columns to categorical. This
    saves lots of memory, allowing us to load and manipulate larger
    DataFrames.
    """
    if not inplace:
        df = df.copy()
    for col in df:
        if df[col].dtype not in ('float', 'category'):
            df[col] = df[col].astype('category')
    if not inplace:
        return df
    else:
        return None

# NOTE: Differs from version in Vivarium Helpers in that here,
# dropna=False
def marginalize(
    df:pd.DataFrame,
    marginalized_cols,
    value_cols=None,
    reset_index=True,
    func='sum',
    args=(), # Positional args to pass to func in DataFrameGroupBy.agg
    **kwargs, # Keywords to pass to DataFrameGroupBy.agg
)->pd.DataFrame:
    if value_cols is None:
        value_cols = vh.vph_output.operations.value_col
    marginalized_cols = vh.utils._ensure_iterable(marginalized_cols)
    value_cols = vh.utils._ensure_iterable(value_cols)
    # Move Index levels into columns to enable passing index
    # level names as well as column names to marginalize
    df = vh.utils._ensure_columns_not_levels(df, marginalized_cols)
    groupby_cols = df.columns.difference(
        # must convert Index to list for groupby to work properly
        [*marginalized_cols, *value_cols]).to_list()
    aggregated_data = df.groupby(
        # observed=True needed for Categorical data
        groupby_cols, as_index=(not reset_index),
        observed=True, dropna=False,
    )[value_cols].agg(func, *args, **kwargs)
    return aggregated_data

def summarize_sim_data(df, age_dictionary=age_dictionary):
    """Summarize simulation data for plotting."""
    # Merge to get an age_start column for plotting
    if 'age_group' in df:
        df = df.merge(age_dictionary, on='age_group')
    # Summarize, and rename percentiles to match artifact
    summary = ops.describe(df).rename(
        columns={'2.5%': 'lower', '97.5%': 'upper'})
    return summary

In [12]:
def dataframe_beutification_and_summarizing(df, measure_name):

    # Add in the scale factor multiplication
    df['event_year'] = df['event_year'].astype(int)
    df = df.merge(
        scale[['location','sex','age_group','ratio','event_year']],
        on=['location','sex','age_group','event_year'])
    df['value'] = df['value'] / df['ratio']    

    # Need to set this up for number and rate to be included 
    df['Metric'] = 'Number'
    df_rate = df.copy()
    df_rate['value'] = df_rate['value'] / 100_000
    df_rate['Metric'] = 'Rate per 100,000'
    df = pd.concat([df, df_rate], ignore_index=True)

    # Renaming, dropping columns, and recategorising
    df = df.rename(columns={'event_year': 'Year ID',
                            'age_group': 'Age',
                            'location': 'Location',
                            'sex':'Sex',
                            'scenario':'Scenario',
                            'sub_entity':'Disease Stage'})
    df['Measure'] = measure_name
    df['Scenario'] = df['Scenario'].cat.rename_categories({
        'baseline': 'Reference',
        'bbbm_testing': 'BBBM Testing Only',
        'bbbm_testing_and_treatment' : 'BBBM Testing and Treatment'
    })
    df['Disease Stage'] = 'Preclinical AD'

    # Now we summarize the data
    df = df.groupby(['Year ID', 'Location', 'Age', 'Sex' , 'Disease Stage' , 'Scenario', 'Measure', 'Metric', 'input_draw']).value.sum().reset_index()
    df = df.groupby(['Year ID', 'Location', 'Age', 'Sex' , 'Disease Stage' , 'Scenario', 'Measure', 'Metric']).value.describe(percentiles=[0.025,0.975]).reset_index()

    df = df.rename(columns={'mean': 'Mean',
                            '2.5%': '95% UI Lower',
                            '97.5%': '95% UI Upper'})

    #Reorder the columns in df 
    column_order = ['Year ID', 'Location', 'Age', 'Sex' , 'Disease Stage' , 'Scenario', 'Measure', 'Metric', 'Mean', '95% UI Lower', '95% UI Upper'] 
    df = df[column_order]

    return df

In [13]:
bbbm_tests = load_sim_output(
    'counts_bbbm_tests',
    )
bbbm_tests.head()

Unnamed: 0,age_group,artifact_path,bbbm_test_results,entity,entity_type,event_year,input_draw,measure,scenario,sex,value,location
0,25_to_29,/mnt/team/simulation_science/pub/models/vivari...,not_tested,bbbm_testing,testing,2025,392,counts_bbbm_tests,baseline,Female,0.0,
1,25_to_29,/mnt/team/simulation_science/pub/models/vivari...,not_tested,bbbm_testing,testing,2025,392,counts_bbbm_tests,baseline,Male,0.0,
2,25_to_29,/mnt/team/simulation_science/pub/models/vivari...,not_tested,bbbm_testing,testing,2025,392,counts_bbbm_tests,bbbm_testing,Female,0.0,
3,25_to_29,/mnt/team/simulation_science/pub/models/vivari...,not_tested,bbbm_testing,testing,2025,392,counts_bbbm_tests,bbbm_testing,Male,0.0,
4,25_to_29,/mnt/team/simulation_science/pub/models/vivari...,not_tested,bbbm_testing,testing,2025,392,counts_bbbm_tests,bbbm_testing_and_treatment,Female,0.0,


In [14]:
bbbm_tests_final = dataframe_beutification_and_summarizing(bbbm_tests, 'BBBM Test Counts')

In [15]:
bbbm_tests_final.loc[(bbbm_tests_final['Year ID'] == 2050) & (bbbm_tests_final['Age'] == '65_to_69') & (bbbm_tests_final['Sex'] == 'Female') & (bbbm_tests_final['Metric'] == 'Number')]

## Need to find an old V&V value to compare to. Can't find one quickly.

Unnamed: 0,Year ID,Location,Age,Sex,Disease Stage,Scenario,Measure,Metric,Mean,95% UI Lower,95% UI Upper
9096,2050,Brazil,65_to_69,Female,Preclinical AD,Reference,BBBM Test Counts,Number,0.0,0.0,0.0
9098,2050,Brazil,65_to_69,Female,Preclinical AD,BBBM Testing Only,BBBM Test Counts,Number,10628.850504,4899.022471,17281.145747
9100,2050,Brazil,65_to_69,Female,Preclinical AD,BBBM Testing and Treatment,BBBM Test Counts,Number,10628.850504,4899.022471,17281.145747
9276,2050,United States of America,65_to_69,Female,Preclinical AD,Reference,BBBM Test Counts,Number,0.0,0.0,0.0
9278,2050,United States of America,65_to_69,Female,Preclinical AD,BBBM Testing Only,BBBM Test Counts,Number,12057.633904,7730.616124,23022.635386
9280,2050,United States of America,65_to_69,Female,Preclinical AD,BBBM Testing and Treatment,BBBM Test Counts,Number,12057.633904,7730.616124,23022.635386


In [16]:
# NEED TO ADD IN BBBM TESTS FROM MSLT HERE 

In [17]:
# prevalence_final.to_csv('/ihme/homes/lutzes/vivarium_research_alzheimers/2025_10_28_prevalence_final.csv')

# CSF and PET Testing

In [18]:
csf_pet_tests = load_sim_output(
    'counts_baseline_tests_among_eligible',
    )
csf_pet_tests.head()

Unnamed: 0,age_group,artifact_path,entity,entity_type,event_year,input_draw,measure,scenario,sex,testing_state,value,location
0,25_to_29,/mnt/team/simulation_science/pub/models/vivari...,baseline_testing,testing,2025,392,counts_baseline_tests_among_eligible,baseline,Female,not_tested,0.0,
1,25_to_29,/mnt/team/simulation_science/pub/models/vivari...,baseline_testing,testing,2025,392,counts_baseline_tests_among_eligible,baseline,Female,csf,0.0,
2,25_to_29,/mnt/team/simulation_science/pub/models/vivari...,baseline_testing,testing,2025,392,counts_baseline_tests_among_eligible,baseline,Female,pet,0.0,
3,25_to_29,/mnt/team/simulation_science/pub/models/vivari...,baseline_testing,testing,2025,392,counts_baseline_tests_among_eligible,baseline,Female,bbbm,0.0,
4,25_to_29,/mnt/team/simulation_science/pub/models/vivari...,baseline_testing,testing,2025,392,counts_baseline_tests_among_eligible,baseline,Male,not_tested,0.0,


In [19]:
def dataframe_beutification_and_summarizing(df, measure_name):

    df = df.loc[df.testing_state.isin(['csf','pet'])]
    df['testing_state'] = df['testing_state'].cat.remove_unused_categories()
    df = df.drop(columns=['measure'])

    df = df.rename(columns={'testing_state':'Measure'})

    df['Measure'] = df['Measure'].cat.rename_categories({
        'csf': 'CSF Test Counts',
        'pet': 'PET Test Counts'
    })

    # Add code to make new measures called 'Averted CSF tests' and 'Averted PET tests' by subtracting from baseline
    df_baseline = df.loc[df['scenario'] == 'baseline']
    df_baseline = df_baseline.rename(columns={'value':'baseline_value'})

    df_averted = df.copy().rename(columns={'value':'all_value'})
    df_averted = df_averted.merge(
        df_baseline[['artifact_path', 'entity_type', 'age_group','event_year','location','sex','input_draw','Measure','baseline_value']],
        on=['artifact_path', 'entity_type', 'age_group','event_year','location','sex','input_draw','Measure'])
    df_averted['value'] = df_averted['baseline_value'] - df_averted['all_value']
    df_averted['Measure'] = df_averted['Measure'].cat.rename_categories({
        'CSF Test Counts': 'Averted CSF Test Counts',
        'PET Test Counts': 'Averted PET Test Counts'
    })
    df = pd.concat([df, df_averted], ignore_index=True)

    # Add in the scale factor multiplication
    df['event_year'] = df['event_year'].astype(int)
    df = df.merge(
        scale[['location','sex','age_group','ratio','event_year']],
        on=['location','sex','age_group','event_year'])
    df['value'] = df['value'] / df['ratio']    

    # Need to set this up for number and rate to be included 
    df['Metric'] = 'Number'
    df_rate = df.copy()
    df_rate['value'] = df_rate['value'] / 100_000
    df_rate['Metric'] = 'Rate per 100,000'
    df = pd.concat([df, df_rate], ignore_index=True)

    # Renaming, dropping columns, and recategorising
    df = df.rename(columns={'event_year': 'Year ID',
                            'age_group': 'Age',
                            'location': 'Location',
                            'sex':'Sex',
                            'scenario':'Scenario',
                            # 'sub_entity':'Disease Stage',
                            'testing_state':'Measure'})
    df['Scenario'] = df['Scenario'].cat.rename_categories({
        'baseline': 'Reference',
        'bbbm_testing': 'BBBM Testing Only',
        'bbbm_testing_and_treatment' : 'BBBM Testing and Treatment'
    })
    df['Disease Stage'] = 'MCI due to AD'

    # Now we summarize the data
    df = df.groupby(['Year ID', 'Location', 'Age', 'Sex' , 'Disease Stage' , 'Scenario', 'Measure', 'Metric', 'input_draw']).value.sum().reset_index()
    df = df.groupby(['Year ID', 'Location', 'Age', 'Sex' , 'Disease Stage' , 'Scenario', 'Measure', 'Metric']).value.describe(percentiles=[0.025,0.975]).reset_index()

    df = df.rename(columns={'mean': 'Mean',
                            '2.5%': '95% UI Lower',
                            '97.5%': '95% UI Upper'})

    #Reorder the columns in df
    column_order = ['Year ID', 'Location', 'Age', 'Sex', 'Disease Stage', 'Scenario', 'Measure', 'Metric', 'Mean', '95% UI Lower', '95% UI Upper']
    df = df[column_order]

    return df

In [20]:
csf_pet_tests_final = dataframe_beutification_and_summarizing(csf_pet_tests, 'CSF and PET Test Counts')

In [21]:
csf_pet_tests_final

Unnamed: 0,Year ID,Location,Age,Sex,Disease Stage,Scenario,Measure,Metric,Mean,95% UI Lower,95% UI Upper
0,2025,Brazil,25_to_29,Female,MCI due to AD,Reference,Averted CSF Test Counts,Number,0.000000,0.000000,0.000000
1,2025,Brazil,25_to_29,Female,MCI due to AD,Reference,Averted CSF Test Counts,"Rate per 100,000",0.000000,0.000000,0.000000
2,2025,Brazil,25_to_29,Female,MCI due to AD,Reference,Averted PET Test Counts,Number,0.000000,0.000000,0.000000
3,2025,Brazil,25_to_29,Female,MCI due to AD,Reference,Averted PET Test Counts,"Rate per 100,000",0.000000,0.000000,0.000000
4,2025,Brazil,25_to_29,Female,MCI due to AD,Reference,CSF Test Counts,Number,0.000000,0.000000,0.000000
...,...,...,...,...,...,...,...,...,...,...,...
51835,2060,United States of America,95_plus,Male,MCI due to AD,BBBM Testing and Treatment,Averted PET Test Counts,"Rate per 100,000",0.000193,0.000000,0.000483
51836,2060,United States of America,95_plus,Male,MCI due to AD,BBBM Testing and Treatment,CSF Test Counts,Number,154.709016,53.181224,285.244748
51837,2060,United States of America,95_plus,Male,MCI due to AD,BBBM Testing and Treatment,CSF Test Counts,"Rate per 100,000",0.001547,0.000532,0.002852
51838,2060,United States of America,95_plus,Male,MCI due to AD,BBBM Testing and Treatment,PET Test Counts,Number,222.394210,53.181224,502.804301


In [22]:
csf_pet_tests_final.loc[(csf_pet_tests_final['Year ID'] == 2050) & (csf_pet_tests_final['Age'] == '65_to_69') & (csf_pet_tests_final['Sex'] == 'Female') & (csf_pet_tests_final['Metric'] == 'Number')]

## Again, could not quickly locate an old V&V value to compare to. But appears reasonable?

Unnamed: 0,Year ID,Location,Age,Sex,Disease Stage,Scenario,Measure,Metric,Mean,95% UI Lower,95% UI Upper
36384,2050,Brazil,65_to_69,Female,MCI due to AD,Reference,Averted CSF Test Counts,Number,0.0,0.0,0.0
36386,2050,Brazil,65_to_69,Female,MCI due to AD,Reference,Averted PET Test Counts,Number,0.0,0.0,0.0
36388,2050,Brazil,65_to_69,Female,MCI due to AD,Reference,CSF Test Counts,Number,1404.178415,1033.631333,1934.645817
36390,2050,Brazil,65_to_69,Female,MCI due to AD,Reference,PET Test Counts,Number,1665.51162,653.333013,2827.859309
36392,2050,Brazil,65_to_69,Female,MCI due to AD,BBBM Testing Only,Averted CSF Test Counts,Number,1014.128855,587.024587,1255.959583
36394,2050,Brazil,65_to_69,Female,MCI due to AD,BBBM Testing Only,Averted PET Test Counts,Number,1248.158591,548.019632,1989.252755
36396,2050,Brazil,65_to_69,Female,MCI due to AD,BBBM Testing Only,CSF Test Counts,Number,390.04956,218.427753,696.238464
36398,2050,Brazil,65_to_69,Female,MCI due to AD,BBBM Testing Only,PET Test Counts,Number,417.353029,105.313381,838.606554
36400,2050,Brazil,65_to_69,Female,MCI due to AD,BBBM Testing and Treatment,Averted CSF Test Counts,Number,1014.128855,587.024587,1255.959583
36402,2050,Brazil,65_to_69,Female,MCI due to AD,BBBM Testing and Treatment,Averted PET Test Counts,Number,1248.158591,548.019632,1989.252755


In [23]:
# FIXME: PermissionError -- need to save to different location
# csf_pet_tests_final.to_csv('/ihme/homes/lutzes/vivarium_research_alzheimers/2025_10_28_csf_pet_tests_final.csv')

# Medication Counts

Need initiation, discontinuation and completion

In [24]:
!ls /mnt/team/simulation_science/pub/models/vivarium_csu_alzheimers/results/model7.4/united_states_of_america/2025_10_24_16_02_54/results/

counts_baseline_tests_among_eligible.parquet
counts_bbbm_tests.parquet
counts_new_simulants.parquet
counts_newly_eligible_for_bbbm_testing.parquet
deaths.parquet
person_time_alzheimers_disease_and_other_dementias.parquet
person_time_eligible_for_bbbm_testing.parquet
person_time_ever_eligible_for_bbbm_testing.parquet
person_time_treatment.parquet
transition_count_alzheimers_disease_and_other_dementias.parquet
transition_count_treatment.parquet
ylds.parquet
ylls.parquet


In [25]:
medication = load_sim_output(
    'transition_count_treatment',
    )
medication.head()

Unnamed: 0,age_group,artifact_path,entity,entity_type,event_year,input_draw,measure,scenario,sex,sub_entity,value,location
0,25_to_29,/mnt/team/simulation_science/pub/models/vivari...,treatment,cause,2025,392,transition_count,baseline,Female,waiting_for_treatment_to_full_effect_long,0.0,
1,25_to_29,/mnt/team/simulation_science/pub/models/vivari...,treatment,cause,2025,392,transition_count,baseline,Female,waiting_for_treatment_to_full_effect_short,0.0,
2,25_to_29,/mnt/team/simulation_science/pub/models/vivari...,treatment,cause,2025,392,transition_count,baseline,Female,full_effect_long_to_waning_effect_long,0.0,
3,25_to_29,/mnt/team/simulation_science/pub/models/vivari...,treatment,cause,2025,392,transition_count,baseline,Female,full_effect_short_to_waning_effect_short,0.0,
4,25_to_29,/mnt/team/simulation_science/pub/models/vivari...,treatment,cause,2025,392,transition_count,baseline,Female,waning_effect_long_to_no_effect_after_long,0.0,


In [26]:
medication.sub_entity.unique()
# Medication initiation is "waiting_for_treatment_to_full_effect_long" and "waiting_for_treatment_to_full_effect_short"
# Medication discontinuation is "waning_effect_short_to_no_effect_after_short"
# Medication completion is "waning_effect_long_to_no_effect_after_long"

# Note: the discontinuation and completion won't perfectly sum to initiation because some people may die while on treatment

['waiting_for_treatment_to_full_effect_long', 'waiting_for_treatment_to_full_effect_short', 'full_effect_long_to_waning_effect_long', 'full_effect_short_to_waning_effect_short', 'waning_effect_long_to_no_effect_after_long', 'waning_effect_short_to_no_effect_after_short', 'susceptible_to_treatment_to_waiting_for_treat..., 'susceptible_to_treatment_to_no_effect_never_t...]
Categories (8, object): ['full_effect_long_to_waning_effect_long', 'full_effect_short_to_waning_effect_short', 'susceptible_to_treatment_to_no_effect_never_t..., 'susceptible_to_treatment_to_waiting_for_treat..., 'waiting_for_treatment_to_full_effect_long', 'waiting_for_treatment_to_full_effect_short', 'waning_effect_long_to_no_effect_after_long', 'waning_effect_short_to_no_effect_after_short']

In [27]:
medication.loc[medication.sub_entity.isin(['waning_effect_long_to_no_effect_after_long'])]


Unnamed: 0,age_group,artifact_path,entity,entity_type,event_year,input_draw,measure,scenario,sex,sub_entity,value,location
4,25_to_29,/mnt/team/simulation_science/pub/models/vivari...,treatment,cause,2025,392,transition_count,baseline,Female,waning_effect_long_to_no_effect_after_long,0.0,
12,25_to_29,/mnt/team/simulation_science/pub/models/vivari...,treatment,cause,2025,392,transition_count,baseline,Male,waning_effect_long_to_no_effect_after_long,0.0,
20,25_to_29,/mnt/team/simulation_science/pub/models/vivari...,treatment,cause,2025,392,transition_count,bbbm_testing,Female,waning_effect_long_to_no_effect_after_long,0.0,
28,25_to_29,/mnt/team/simulation_science/pub/models/vivari...,treatment,cause,2025,392,transition_count,bbbm_testing,Male,waning_effect_long_to_no_effect_after_long,0.0,
36,25_to_29,/mnt/team/simulation_science/pub/models/vivari...,treatment,cause,2025,392,transition_count,bbbm_testing_and_treatment,Female,waning_effect_long_to_no_effect_after_long,0.0,
...,...,...,...,...,...,...,...,...,...,...,...,...
1295964,95_plus,/mnt/team/simulation_science/pub/models/vivari...,treatment,cause,2060,346,transition_count,baseline,Male,waning_effect_long_to_no_effect_after_long,0.0,
1295972,95_plus,/mnt/team/simulation_science/pub/models/vivari...,treatment,cause,2060,346,transition_count,bbbm_testing,Female,waning_effect_long_to_no_effect_after_long,0.0,
1295980,95_plus,/mnt/team/simulation_science/pub/models/vivari...,treatment,cause,2060,346,transition_count,bbbm_testing,Male,waning_effect_long_to_no_effect_after_long,0.0,
1295988,95_plus,/mnt/team/simulation_science/pub/models/vivari...,treatment,cause,2060,346,transition_count,bbbm_testing_and_treatment,Female,waning_effect_long_to_no_effect_after_long,0.0,


In [35]:
def dataframe_beutification_and_summarizing(df, measure_name):

    df = df.loc[df.sub_entity.isin(['waiting_for_treatment_to_full_effect_long','waiting_for_treatment_to_full_effect_short'])]
    df['sub_entity'] = df['sub_entity'].cat.remove_unused_categories()
    df = df.drop(columns=['measure'])

    df = df.rename(columns={'sub_entity':'Measure'})
 
    # # Combine the two initiation categories into one before renaming
    # df['Measure'] = df['Measure'].replace({
    #     'waiting_for_treatment_to_full_effect_long': 'Medication Initiation Counts',
    #     'waiting_for_treatment_to_full_effect_short': 'Medication Initiation Counts',
    #     'waning_effect_short_to_no_effect_after_short': 'Medication Discontinuation Counts',
    #     'waning_effect_long_to_no_effect_after_long': 'Medication Completion Counts'
    # })
    # df['Measure'] = df['Measure'].astype('category')

    # Define a Categorical dtype with all 3 categories to preserve
    # Categoricals upon concatenation
    medication_count_dtype = pd.CategoricalDtype(
        ['Medication Initiation Counts', 'Medication Completion Counts', 'Medication Discontinuation Counts'])
    df['Measure'] = df['Measure'].replace({
        'waiting_for_treatment_to_full_effect_long': 'Medication Completion Counts',
        'waiting_for_treatment_to_full_effect_short': 'Medication Discontinuation Counts',
    }).astype(medication_count_dtype)
    # initiation counts = completion counts + discontinuation counts, so
    # we copy the dataframe and rename these both 'initiation' so
    # they'll be added togeter when we do the groupby below
    df_initiation = df.replace(
        {'Measure': {'Medication Completion Counts': 'Medication Initiation Counts',
                     'Medication Discontinuation Counts': 'Medication Initiation Counts'}
        }).astype({'Measure': medication_count_dtype})
    df = pd.concat([df, df_initiation], ignore_index=True)

    # Add in the scale factor multiplication
    df['event_year'] = df['event_year'].astype(int)
    df = df.merge(
        scale[['location','sex','age_group','ratio','event_year']],
        on=['location','sex','age_group','event_year'])
    df['value'] = df['value'] / df['ratio']    

    # Need to set this up for number and rate to be included 
    df['Metric'] = 'Number'
    df_rate = df.copy()
    df_rate['value'] = df_rate['value'] / 100_000
    df_rate['Metric'] = 'Rate per 100,000'
    df = pd.concat([df, df_rate], ignore_index=True)

    # Renaming, dropping columns, and recategorising
    df = df.rename(columns={'event_year': 'Year ID',
                            'age_group': 'Age',
                            'location': 'Location',
                            'sex':'Sex',
                            'scenario':'Scenario'})
    df['Scenario'] = df['Scenario'].cat.rename_categories({
        'baseline': 'Reference',
        'bbbm_testing': 'BBBM Testing Only',
        'bbbm_testing_and_treatment' : 'BBBM Testing and Treatment'
    })
    df['Disease Stage'] = 'Preclinical AD'

    # Now we summarize the data
    df = df.groupby(['Year ID', 'Location', 'Age', 'Sex' , 'Disease Stage' , 'Scenario', 'Measure', 'Metric', 'input_draw'], observed=True).value.sum().reset_index()
    df = df.groupby(['Year ID', 'Location', 'Age', 'Sex' , 'Disease Stage' , 'Scenario', 'Measure', 'Metric'], observed=True).value.describe(percentiles=[0.025,0.975]).reset_index()

    df = df.rename(columns={'mean': 'Mean',
                            '2.5%': '95% UI Lower',
                            '97.5%': '95% UI Upper'})

    #Reorder the columns in df
    column_order = ['Year ID', 'Location', 'Age', 'Sex', 'Disease Stage', 'Scenario', 'Measure', 'Metric', 'Mean', '95% UI Lower', '95% UI Upper']
    df = df[column_order]

    return df

In [36]:
medication_final = dataframe_beutification_and_summarizing(medication, 'Medication Counts')

In [37]:
medication_final.loc[(medication_final['Year ID'] == 2060) & (medication_final['Age'] == '80_to_84') & (medication_final['Sex'] == 'Female') & (medication_final['Metric'] == 'Number')]

## Haven't really validated these but look reasonable? I had to bump up the age group to get non-zero values for medication completion. We should confirm what qualifies as "completion" in the model. 

Unnamed: 0,Year ID,Location,Age,Sex,Disease Stage,Scenario,Measure,Metric,Mean,95% UI Lower,95% UI Upper
38196,2060,Brazil,80_to_84,Female,Preclinical AD,Reference,Medication Completion Counts,Number,0.0,0.0,0.0
38198,2060,Brazil,80_to_84,Female,Preclinical AD,Reference,Medication Discontinuation Counts,Number,0.0,0.0,0.0
38200,2060,Brazil,80_to_84,Female,Preclinical AD,Reference,Medication Initiation Counts,Number,0.0,0.0,0.0
38202,2060,Brazil,80_to_84,Female,Preclinical AD,BBBM Testing Only,Medication Completion Counts,Number,0.0,0.0,0.0
38204,2060,Brazil,80_to_84,Female,Preclinical AD,BBBM Testing Only,Medication Discontinuation Counts,Number,0.0,0.0,0.0
38206,2060,Brazil,80_to_84,Female,Preclinical AD,BBBM Testing Only,Medication Initiation Counts,Number,0.0,0.0,0.0
38208,2060,Brazil,80_to_84,Female,Preclinical AD,BBBM Testing and Treatment,Medication Completion Counts,Number,971.223404,331.542126,1661.611125
38210,2060,Brazil,80_to_84,Female,Preclinical AD,BBBM Testing and Treatment,Medication Discontinuation Counts,Number,109.213877,44.855699,156.019824
38212,2060,Brazil,80_to_84,Female,Preclinical AD,BBBM Testing and Treatment,Medication Initiation Counts,Number,1080.437281,376.397825,1811.780205
38736,2060,United States of America,80_to_84,Female,Preclinical AD,Reference,Medication Completion Counts,Number,0.0,0.0,0.0


In [40]:
971.223404 + 109.213877	

1080.437281

In [41]:
medication_final.dtypes

Year ID             int64
Location           object
Age                object
Sex                object
Disease Stage      object
Scenario         category
Measure          category
Metric             object
Mean              float64
95% UI Lower      float64
95% UI Upper      float64
dtype: object

In [39]:
medication_final.loc[(medication_final['Year ID'] == 2060) & (medication_final['Age'] == '75_to_79') & (medication_final['Sex'] == 'Female') & (medication_final['Metric'] == 'Number')]

Unnamed: 0,Year ID,Location,Age,Sex,Disease Stage,Scenario,Measure,Metric,Mean,95% UI Lower,95% UI Upper
38160,2060,Brazil,75_to_79,Female,Preclinical AD,Reference,Medication Completion Counts,Number,0.0,0.0,0.0
38162,2060,Brazil,75_to_79,Female,Preclinical AD,Reference,Medication Discontinuation Counts,Number,0.0,0.0,0.0
38164,2060,Brazil,75_to_79,Female,Preclinical AD,Reference,Medication Initiation Counts,Number,0.0,0.0,0.0
38166,2060,Brazil,75_to_79,Female,Preclinical AD,BBBM Testing Only,Medication Completion Counts,Number,0.0,0.0,0.0
38168,2060,Brazil,75_to_79,Female,Preclinical AD,BBBM Testing Only,Medication Discontinuation Counts,Number,0.0,0.0,0.0
38170,2060,Brazil,75_to_79,Female,Preclinical AD,BBBM Testing Only,Medication Initiation Counts,Number,0.0,0.0,0.0
38172,2060,Brazil,75_to_79,Female,Preclinical AD,BBBM Testing and Treatment,Medication Completion Counts,Number,10558.641584,7658.623107,12360.67055
38174,2060,Brazil,75_to_79,Female,Preclinical AD,BBBM Testing and Treatment,Medication Discontinuation Counts,Number,1033.631333,743.044411,1291.064043
38176,2060,Brazil,75_to_79,Female,Preclinical AD,BBBM Testing and Treatment,Medication Initiation Counts,Number,11592.272917,8413.369005,13651.734593
38700,2060,United States of America,75_to_79,Female,Preclinical AD,Reference,Medication Completion Counts,Number,0.0,0.0,0.0


In [31]:
# FIXME: PermissionError
# medication_final.to_csv('/ihme/homes/lutzes/vivarium_research_alzheimers/2025_10_28_medication_final.csv')

In [33]:
__name__

'__main__'

In [42]:
!pwd

/mnt/team/simulation_science/pub/models/vivarium_csu_alzheimers


In [44]:
Path().cwd()

PosixPath('/mnt/team/simulation_science/pub/models/vivarium_csu_alzheimers')