In [None]:
import pandas as pd
import numpy as np, os
import matplotlib.pyplot as plt

from pathlib import Path
import yaml
import re
import math

import gbd_mapping as gm
from vivarium import Artifact

from db_queries import get_ids, get_outputs, get_population, get_covariate_estimates
from get_draws.api import get_draws

import vivarium_helpers as vh
import vivarium_helpers.id_helper as idh
from vivarium_helpers.vph_output.operations import VPHOperator, summarize_draws
from vivarium_helpers.utils import aggregate_mean_lower_upper, convert_to_categorical, print_memory_usage

!date
!whoami
!pwd

Sun Nov  2 14:51:11 PST 2025
ndbs
/mnt/share/code/ndbs/vivarium_research_alzheimers/scratch


In [9]:
# Project directory
project_dir = '/mnt/team/simulation_science/pub/models/vivarium_csu_alzheimers/'

# For testing: Run directory containing model 8.3 results for all
# locations
model_run_subdir = 'results/abie_consistent_model_test/united_states_of_america/2025_10_28_08_55_05/'

# Results directory for model 8.3, for testing
results_dir = Path(project_dir) / model_run_subdir / 'results/'

# Artifact for models 8.3 and 8.4
artifact_model_number = '8.3'

print(results_dir.exists())
results_dir

True


PosixPath('/mnt/team/simulation_science/pub/models/vivarium_csu_alzheimers/results/abie_consistent_model_test/united_states_of_america/2025_10_28_08_55_05/results')

In [2]:
!ls ..

2025_08_03a_alz_dw_explore.ipynb
2025_09_19a_alz_sim_distributions.ipynb
2025_10_28_csf_pet_tests_final.csv
2025_10_28_dalys_final.csv
2025_10_28_deaths_final.csv
2025_10_28_incidence_final.csv
2025_10_28_medication_final.csv
2025_10_28_prevalence_final.csv
2025_10_28_results_dataframe_health.ipynb
2025_10_28_results_dataframe_test_treat.ipynb
README.md
data_prep
generate_and_test_consistent_rates.ipynb
results_tables
scratch
verification_and_validation


In [3]:
incidence = pd.read_csv('../2025_10_28_incidence_final.csv')
incidence

Unnamed: 0.1,Unnamed: 0,Year ID,Location,Age,Sex,Disease Stage,Scenario,Measure,Metric,Mean,95% UI Lower,95% UI Upper
0,0,2025,Brazil,25_to_29,Female,MCI due to AD,Reference,Incident cases,Number,0.0,0.0,0.0
1,1,2025,Brazil,25_to_29,Female,MCI due to AD,Reference,Incident cases,"Rate per 100,000",0.0,0.0,0.0
2,2,2025,Brazil,25_to_29,Female,MCI due to AD,BBBM Testing Only,Incident cases,Number,0.0,0.0,0.0
3,3,2025,Brazil,25_to_29,Female,MCI due to AD,BBBM Testing Only,Incident cases,"Rate per 100,000",0.0,0.0,0.0
4,4,2025,Brazil,25_to_29,Female,MCI due to AD,BBBM Testing and Treatment,Incident cases,Number,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...
38875,38875,2060,United States of America,95_plus,Male,Preclinical AD,Reference,Incident cases,"Rate per 100,000",0.0,0.0,0.0
38876,38876,2060,United States of America,95_plus,Male,Preclinical AD,BBBM Testing Only,Incident cases,Number,0.0,0.0,0.0
38877,38877,2060,United States of America,95_plus,Male,Preclinical AD,BBBM Testing Only,Incident cases,"Rate per 100,000",0.0,0.0,0.0
38878,38878,2060,United States of America,95_plus,Male,Preclinical AD,BBBM Testing and Treatment,Incident cases,Number,0.0,0.0,0.0


In [4]:
incidence['Disease Stage'].unique()

array(['MCI due to AD', 'AD Dementia', 'Preclinical AD'], dtype=object)

In [5]:
incidence['Location'].unique()

array(['Brazil', 'United States of America'], dtype=object)

# Let's count how many rows we should have per measure

Columns are:

* location (10 locations)
* year (2025 - 2100)
* age group (25-29, ..., 90-94, 95+)
* sex (male, female, both)
* disease stage (preclinical, MCI, dementia)
* scenario (3 scenarios)
* metric (number, rate)
* measure (lots of measures)


If all strata are included, the total number of rows _per measure_ is
615,600. So, large, but not insane.


In [6]:
strata_counts = {
    'location': 10,
    'year': len(range(2025, 2101)),
    'age_group': len(range(25, 95, 5)) + 1, # add 1 for 95+
    'sex': 3,
    'disease_stage': 3,
    'scenario': 3,
    'metric': 2
}
strata_counts

{'location': 10,
 'year': 76,
 'age_group': 15,
 'sex': 3,
 'disease_stage': 3,
 'scenario': 3,
 'metric': 2}

In [7]:
math.prod(strata_counts.values())

615600

In [6]:
strata_counts_2 = {
    'location': 10,
    'year': len(range(2022, 2101)),
    'age_group': len(range(25, 95, 5)) + 1, # add 1 for 95+
    'sex': 2,
    'disease_stage': 3,
    'scenario': 3,
    'metric': 1
}
strata_counts_2

{'location': 10,
 'year': 79,
 'age_group': 15,
 'sex': 2,
 'disease_stage': 3,
 'scenario': 3,
 'metric': 1}

In [8]:
math.prod(strata_counts_2.values())

213300

# Test renaming things with Categoricals

In [8]:
rng = np.random.default_rng(555666777)
n = 10
df = pd.DataFrame(
    {
        'species': rng.choice(['yak', 'smurf'], n),
        'sex': rng.choice(['male', 'female'], n),
    }
).astype('category')
df

Unnamed: 0,species,sex
0,smurf,male
1,smurf,male
2,smurf,female
3,yak,male
4,yak,male
5,yak,female
6,yak,female
7,smurf,male
8,yak,male
9,yak,male


In [9]:
df['species']

0    smurf
1    smurf
2    smurf
3      yak
4      yak
5      yak
6      yak
7    smurf
8      yak
9      yak
Name: species, dtype: category
Categories (2, object): ['smurf', 'yak']

In [10]:
df2 = df.replace({'yak': 'squid'})
df2

Unnamed: 0,species,sex
0,smurf,male
1,smurf,male
2,smurf,female
3,squid,male
4,squid,male
5,squid,female
6,squid,female
7,smurf,male
8,squid,male
9,squid,male


In [11]:
df2['species']

0    smurf
1    smurf
2    smurf
3    squid
4    squid
5    squid
6    squid
7    smurf
8    squid
9    squid
Name: species, dtype: category
Categories (2, object): ['smurf', 'squid']

In [13]:
df['species'].replace({'smurf': 'goat'})

0    goat
1    goat
2    goat
3     yak
4     yak
5     yak
6     yak
7    goat
8     yak
9     yak
Name: species, dtype: category
Categories (2, object): ['goat', 'yak']

In [15]:
df3 = df.replace({'species': {'smurf': 'orc', 'yak': 'orc'}})
df3['species']

0    orc
1    orc
2    orc
3    orc
4    orc
5    orc
6    orc
7    orc
8    orc
9    orc
Name: species, dtype: category
Categories (1, object): ['orc']

# Test aggregation

In [10]:
!ls $results_dir

counts_baseline_tests_among_eligible.parquet
counts_bbbm_tests.parquet
counts_new_simulants.parquet
counts_newly_eligible_for_bbbm_testing.parquet
deaths.parquet
person_time_alzheimers_disease_and_other_dementias.parquet
person_time_eligible_for_bbbm_testing.parquet
person_time_ever_eligible_for_bbbm_testing.parquet
person_time_treatment.parquet
transition_count_alzheimers_disease_and_other_dementias.parquet
transition_count_treatment.parquet
ylds.parquet
ylls.parquet


In [31]:
ops = VPHOperator(location_col=True)

deaths_filters = [
    ('entity', '=', 'alzheimers_disease_state'),
    ('event_year', '>=', '2040'),
    ('event_year', '<', '2050'),
    ('age_group', '>=', '60_to_64'),
    ('age_group', '<', '80_to_84'),
]
deaths = (
    pd.read_parquet(results_dir / 'deaths.parquet', filters=deaths_filters)
    .drop(columns='input_draw_number')
    .pipe(convert_to_categorical)
    .pipe(ops.marginalize, 'random_seed')
)
print_memory_usage(deaths)
deaths

0.2244 MB 


Unnamed: 0,age_group,artifact_path,entity,entity_type,event_year,input_draw,measure,scenario,sex,sub_entity,value
0,60_to_64,/mnt/team/simulation_science/pub/models/vivari...,alzheimers_disease_state,cause,2040,392,deaths,baseline,Female,alzheimers_disease_state,22.0
1,60_to_64,/mnt/team/simulation_science/pub/models/vivari...,alzheimers_disease_state,cause,2040,392,deaths,baseline,Male,alzheimers_disease_state,13.0
2,60_to_64,/mnt/team/simulation_science/pub/models/vivari...,alzheimers_disease_state,cause,2040,392,deaths,bbbm_testing,Female,alzheimers_disease_state,22.0
3,60_to_64,/mnt/team/simulation_science/pub/models/vivari...,alzheimers_disease_state,cause,2040,392,deaths,bbbm_testing,Male,alzheimers_disease_state,13.0
4,60_to_64,/mnt/team/simulation_science/pub/models/vivari...,alzheimers_disease_state,cause,2040,392,deaths,bbbm_testing_and_treatment,Female,alzheimers_disease_state,22.0
...,...,...,...,...,...,...,...,...,...,...,...
11995,75_to_79,/mnt/team/simulation_science/pub/models/vivari...,alzheimers_disease_state,cause,2049,346,deaths,baseline,Male,alzheimers_disease_state,743.0
11996,75_to_79,/mnt/team/simulation_science/pub/models/vivari...,alzheimers_disease_state,cause,2049,346,deaths,bbbm_testing,Female,alzheimers_disease_state,1711.0
11997,75_to_79,/mnt/team/simulation_science/pub/models/vivari...,alzheimers_disease_state,cause,2049,346,deaths,bbbm_testing,Male,alzheimers_disease_state,743.0
11998,75_to_79,/mnt/team/simulation_science/pub/models/vivari...,alzheimers_disease_state,cause,2049,346,deaths,bbbm_testing_and_treatment,Female,alzheimers_disease_state,1694.0


In [26]:
deaths.random_seed.unique()

[6810, 2284, 5616, 2787, 4344]
Categories (5, int64): [2284, 2787, 4344, 5616, 6810]

In [24]:
deaths.event_year.unique()

['2040', '2041', '2042', '2043', '2044', ..., '2056', '2057', '2058', '2059', '2060']
Length: 21
Categories (36, object): ['2025', '2026', '2027', '2028', ..., '2057', '2058', '2059', '2060']

In [36]:
deaths_summary1 = ops.summarize_draws(deaths).reset_index()
deaths_summary1

Unnamed: 0,age_group,artifact_path,entity,entity_type,event_year,measure,scenario,sex,sub_entity,mean,lower,upper
0,60_to_64,/mnt/team/simulation_science/pub/models/vivari...,alzheimers_disease_state,cause,2040,deaths,baseline,Female,alzheimers_disease_state,29.0,22.7,32.0
1,60_to_64,/mnt/team/simulation_science/pub/models/vivari...,alzheimers_disease_state,cause,2040,deaths,baseline,Male,alzheimers_disease_state,13.0,7.4,21.1
2,60_to_64,/mnt/team/simulation_science/pub/models/vivari...,alzheimers_disease_state,cause,2040,deaths,bbbm_testing,Female,alzheimers_disease_state,29.0,22.7,32.0
3,60_to_64,/mnt/team/simulation_science/pub/models/vivari...,alzheimers_disease_state,cause,2040,deaths,bbbm_testing,Male,alzheimers_disease_state,13.0,7.4,21.1
4,60_to_64,/mnt/team/simulation_science/pub/models/vivari...,alzheimers_disease_state,cause,2040,deaths,bbbm_testing_and_treatment,Female,alzheimers_disease_state,29.0,22.7,32.0
...,...,...,...,...,...,...,...,...,...,...,...,...
2395,75_to_79,/mnt/team/simulation_science/pub/models/vivari...,alzheimers_disease_state,cause,2049,deaths,baseline,Male,alzheimers_disease_state,591.4,497.0,733.1
2396,75_to_79,/mnt/team/simulation_science/pub/models/vivari...,alzheimers_disease_state,cause,2049,deaths,bbbm_testing,Female,alzheimers_disease_state,1598.2,1421.2,1823.5
2397,75_to_79,/mnt/team/simulation_science/pub/models/vivari...,alzheimers_disease_state,cause,2049,deaths,bbbm_testing,Male,alzheimers_disease_state,591.4,497.0,733.1
2398,75_to_79,/mnt/team/simulation_science/pub/models/vivari...,alzheimers_disease_state,cause,2049,deaths,bbbm_testing_and_treatment,Female,alzheimers_disease_state,1574.8,1408.6,1801.1


In [38]:
deaths_summary1 = ops.summarize_draws(deaths, 'mean').reset_index()
deaths_summary1

Unnamed: 0,age_group,artifact_path,entity,entity_type,event_year,measure,scenario,sex,sub_entity,value
0,60_to_64,/mnt/team/simulation_science/pub/models/vivari...,alzheimers_disease_state,cause,2040,deaths,baseline,Female,alzheimers_disease_state,29.0
1,60_to_64,/mnt/team/simulation_science/pub/models/vivari...,alzheimers_disease_state,cause,2040,deaths,baseline,Male,alzheimers_disease_state,13.0
2,60_to_64,/mnt/team/simulation_science/pub/models/vivari...,alzheimers_disease_state,cause,2040,deaths,bbbm_testing,Female,alzheimers_disease_state,29.0
3,60_to_64,/mnt/team/simulation_science/pub/models/vivari...,alzheimers_disease_state,cause,2040,deaths,bbbm_testing,Male,alzheimers_disease_state,13.0
4,60_to_64,/mnt/team/simulation_science/pub/models/vivari...,alzheimers_disease_state,cause,2040,deaths,bbbm_testing_and_treatment,Female,alzheimers_disease_state,29.0
...,...,...,...,...,...,...,...,...,...,...
2395,75_to_79,/mnt/team/simulation_science/pub/models/vivari...,alzheimers_disease_state,cause,2049,deaths,baseline,Male,alzheimers_disease_state,591.4
2396,75_to_79,/mnt/team/simulation_science/pub/models/vivari...,alzheimers_disease_state,cause,2049,deaths,bbbm_testing,Female,alzheimers_disease_state,1598.2
2397,75_to_79,/mnt/team/simulation_science/pub/models/vivari...,alzheimers_disease_state,cause,2049,deaths,bbbm_testing,Male,alzheimers_disease_state,591.4
2398,75_to_79,/mnt/team/simulation_science/pub/models/vivari...,alzheimers_disease_state,cause,2049,deaths,bbbm_testing_and_treatment,Female,alzheimers_disease_state,1574.8
