In [1]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
pd.set_option('display.max_rows', 8)

from scipy import stats
import collections

import warnings
# warnings.filterwarnings('ignore')
from matplotlib.backends.backend_pdf import PdfPages

from pathlib import Path

import db_queries as db
import vivarium_helpers.id_helper as idh
import gbd_mapping
from vivarium import Artifact

# Add the repo directory vivarium_research_ciff_sam/ to sys.path
import os, sys
repo_path = os.path.abspath('../..')
sys.path.append(repo_path)
# Assumes vivarium_research_ciff_sam/ is in sys.path
# import model_validation.vivarium_transformed_output as vto
# import model_validation.vivarium_raw_output as vro
import model_validation.vivarium_output_processing as vp
import model_validation.ciff_sam_results as csr
import model_validation.ciff_sam_plots as csp

%load_ext autoreload
%autoreload 2

!pwd
!whoami
!date

/ihme/homes/ndbs/vivarium_research_ciff_sam/nathaniel/scratch
ndbs
Tue Oct 12 13:54:15 PDT 2021


# Test `get_prevalence` against `get_sqlns_coverage`

They match except for the numerator measures (state person time vs. person time), because I used the `wasting_state_person_time` table in `get_sqlns_coverage` vs. the `person_time` table in `get_prevalence`.

In [8]:
results = csr.VivariumResults.cleaned_from_model_spec('4.1')
results.table_names()

['wasting_transition_count',
 'wasting_state_person_time',
 'deaths',
 'stunting_state_person_time',
 'population',
 'ylls',
 'ylds',
 'person_time',
 'cause_state_person_time',
 'cause_transition_count']

In [26]:
strata = ['age', 'year']
sqlns_prev1 = csr.get_sqlns_coverage(results, strata)
sqlns_prev2 = csr.get_prevalence(results, 'sq_lns', strata)
sqlns_prev1.drop(columns='numerator_measure').equals(sqlns_prev2.drop(columns='numerator_measure'))

False

In [10]:
sqlns_prev1

Unnamed: 0,age,year,input_draw,scenario,sq_lns,value,numerator_measure,denominator_measure,multiplier
0,1-5_months,2022,29,baseline,covered,0.0,state_person_time,person_time,1
1,1-5_months,2022,29,baseline,uncovered,1.0,state_person_time,person_time,1
2,1-5_months,2022,29,sqlns,covered,0.0,state_person_time,person_time,1
3,1-5_months,2022,29,sqlns,uncovered,1.0,state_person_time,person_time,1
...,...,...,...,...,...,...,...,...,...
2156,late_neonatal,2026,946,sqlns,covered,0.0,state_person_time,person_time,1
2157,late_neonatal,2026,946,sqlns,uncovered,1.0,state_person_time,person_time,1
2158,late_neonatal,2026,946,wasting_treatment,covered,0.0,state_person_time,person_time,1
2159,late_neonatal,2026,946,wasting_treatment,uncovered,1.0,state_person_time,person_time,1


In [11]:
sqlns_prev2

Unnamed: 0,age,year,input_draw,scenario,sq_lns,value,numerator_measure,denominator_measure,multiplier
0,1-5_months,2022,29,baseline,covered,0.0,person_time,person_time,1
1,1-5_months,2022,29,baseline,uncovered,1.0,person_time,person_time,1
2,1-5_months,2022,29,sqlns,covered,0.0,person_time,person_time,1
3,1-5_months,2022,29,sqlns,uncovered,1.0,person_time,person_time,1
...,...,...,...,...,...,...,...,...,...
2156,late_neonatal,2026,946,sqlns,covered,0.0,person_time,person_time,1
2157,late_neonatal,2026,946,sqlns,uncovered,1.0,person_time,person_time,1
2158,late_neonatal,2026,946,wasting_treatment,covered,0.0,person_time,person_time,1
2159,late_neonatal,2026,946,wasting_treatment,uncovered,1.0,person_time,person_time,1


## The assert_equal statement passes after dropping numerator measure

In [27]:
vp.assert_values_equal(sqlns_prev1.drop(columns='numerator_measure'), sqlns_prev2.drop(columns='numerator_measure'))

In [28]:
vp.compare_values(sqlns_prev1.drop(columns='numerator_measure'), sqlns_prev2.drop(columns='numerator_measure'))

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Unnamed: 4_level_0,Unnamed: 5_level_0,Unnamed: 6_level_0,value,value
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,self,other
age,denominator_measure,input_draw,multiplier,scenario,sq_lns,year,Unnamed: 7_level_2,Unnamed: 8_level_2
1-5_months,person_time,232,1,wasting_treatment,uncovered,2022,1.0,1.0
1-5_months,person_time,394,1,wasting_treatment,uncovered,2022,1.0,1.0
1-5_months,person_time,524,1,baseline,uncovered,2022,1.0,1.0
1-5_months,person_time,524,1,wasting_treatment,uncovered,2022,1.0,1.0
...,...,...,...,...,...,...,...,...
late_neonatal,person_time,524,1,sqlns,uncovered,2026,1.0,1.0
late_neonatal,person_time,524,1,wasting_treatment,uncovered,2026,1.0,1.0
late_neonatal,person_time,602,1,baseline,uncovered,2026,1.0,1.0
late_neonatal,person_time,650,1,baseline,uncovered,2026,1.0,1.0


## Figure out why assert was failing at first

Reindexing was giving all NaNs because the `numerator_measure` index columns did not match.

In [19]:
df1 = vp.value(sqlns_prev1)
df1

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Unnamed: 4_level_0,Unnamed: 5_level_0,Unnamed: 6_level_0,Unnamed: 7_level_0,value
age,denominator_measure,input_draw,multiplier,numerator_measure,scenario,sq_lns,year,Unnamed: 8_level_1
1-5_months,person_time,29,1,state_person_time,baseline,covered,2022,0.0
1-5_months,person_time,29,1,state_person_time,baseline,uncovered,2022,1.0
1-5_months,person_time,29,1,state_person_time,sqlns,covered,2022,0.0
1-5_months,person_time,29,1,state_person_time,sqlns,uncovered,2022,1.0
...,...,...,...,...,...,...,...,...
late_neonatal,person_time,946,1,state_person_time,sqlns,covered,2026,0.0
late_neonatal,person_time,946,1,state_person_time,sqlns,uncovered,2026,1.0
late_neonatal,person_time,946,1,state_person_time,wasting_treatment,covered,2026,0.0
late_neonatal,person_time,946,1,state_person_time,wasting_treatment,uncovered,2026,1.0


In [22]:
df2 = vp.value(sqlns_prev2)
df2

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Unnamed: 4_level_0,Unnamed: 5_level_0,Unnamed: 6_level_0,Unnamed: 7_level_0,value
age,denominator_measure,input_draw,multiplier,numerator_measure,scenario,sq_lns,year,Unnamed: 8_level_1
1-5_months,person_time,29,1,person_time,baseline,covered,2022,0.0
1-5_months,person_time,29,1,person_time,baseline,uncovered,2022,1.0
1-5_months,person_time,29,1,person_time,sqlns,covered,2022,0.0
1-5_months,person_time,29,1,person_time,sqlns,uncovered,2022,1.0
...,...,...,...,...,...,...,...,...
late_neonatal,person_time,946,1,person_time,sqlns,covered,2026,0.0
late_neonatal,person_time,946,1,person_time,sqlns,uncovered,2026,1.0
late_neonatal,person_time,946,1,person_time,wasting_treatment,covered,2026,0.0
late_neonatal,person_time,946,1,person_time,wasting_treatment,uncovered,2026,1.0


In [23]:
df1.reindex(df2.index)

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Unnamed: 4_level_0,Unnamed: 5_level_0,Unnamed: 6_level_0,Unnamed: 7_level_0,value
age,denominator_measure,input_draw,multiplier,numerator_measure,scenario,sq_lns,year,Unnamed: 8_level_1
1-5_months,person_time,29,1,person_time,baseline,covered,2022,
1-5_months,person_time,29,1,person_time,baseline,uncovered,2022,
1-5_months,person_time,29,1,person_time,sqlns,covered,2022,
1-5_months,person_time,29,1,person_time,sqlns,uncovered,2022,
...,...,...,...,...,...,...,...,...
late_neonatal,person_time,946,1,person_time,sqlns,covered,2026,
late_neonatal,person_time,946,1,person_time,sqlns,uncovered,2026,
late_neonatal,person_time,946,1,person_time,wasting_treatment,covered,2026,
late_neonatal,person_time,946,1,person_time,wasting_treatment,uncovered,2026,


In [24]:
df1.index

MultiIndex([(   '1-5_months', 'person_time',  29, 1, 'state_person_time', ...),
            (   '1-5_months', 'person_time',  29, 1, 'state_person_time', ...),
            (   '1-5_months', 'person_time',  29, 1, 'state_person_time', ...),
            (   '1-5_months', 'person_time',  29, 1, 'state_person_time', ...),
            (   '1-5_months', 'person_time',  29, 1, 'state_person_time', ...),
            (   '1-5_months', 'person_time',  29, 1, 'state_person_time', ...),
            (   '1-5_months', 'person_time', 223, 1, 'state_person_time', ...),
            (   '1-5_months', 'person_time', 223, 1, 'state_person_time', ...),
            (   '1-5_months', 'person_time', 223, 1, 'state_person_time', ...),
            (   '1-5_months', 'person_time', 223, 1, 'state_person_time', ...),
            ...
            ('late_neonatal', 'person_time', 829, 1, 'state_person_time', ...),
            ('late_neonatal', 'person_time', 829, 1, 'state_person_time', ...),
            ('late_neona

In [25]:
df2.index

MultiIndex([(   '1-5_months', 'person_time',  29, 1, 'person_time', ...),
            (   '1-5_months', 'person_time',  29, 1, 'person_time', ...),
            (   '1-5_months', 'person_time',  29, 1, 'person_time', ...),
            (   '1-5_months', 'person_time',  29, 1, 'person_time', ...),
            (   '1-5_months', 'person_time',  29, 1, 'person_time', ...),
            (   '1-5_months', 'person_time',  29, 1, 'person_time', ...),
            (   '1-5_months', 'person_time', 223, 1, 'person_time', ...),
            (   '1-5_months', 'person_time', 223, 1, 'person_time', ...),
            (   '1-5_months', 'person_time', 223, 1, 'person_time', ...),
            (   '1-5_months', 'person_time', 223, 1, 'person_time', ...),
            ...
            ('late_neonatal', 'person_time', 829, 1, 'person_time', ...),
            ('late_neonatal', 'person_time', 829, 1, 'person_time', ...),
            ('late_neonatal', 'person_time', 829, 1, 'person_time', ...),
            ('late_neo

## It looks like most values are 0 and 1...

In [29]:
df1.describe()

Unnamed: 0,value
count,2160.0
mean,0.5
std,0.48797
min,0.0
25%,0.0
50%,0.5
75%,1.0
max,1.0


## Check that we actually get something besides 1s and 0s when we filter to nonzero coverage

In [32]:
sqlns_prev1.query("age == '2_to_4' and scenario=='sqlns' and year>'2022'")

Unnamed: 0,age,year,input_draw,scenario,sq_lns,value,numerator_measure,denominator_measure,multiplier
794,2_to_4,2023,29,sqlns,covered,0.900364,state_person_time,person_time,1
795,2_to_4,2023,29,sqlns,uncovered,0.099636,state_person_time,person_time,1
800,2_to_4,2023,223,sqlns,covered,0.900166,state_person_time,person_time,1
801,2_to_4,2023,223,sqlns,uncovered,0.099834,state_person_time,person_time,1
...,...,...,...,...,...,...,...,...,...
1070,2_to_4,2026,829,sqlns,covered,0.899814,state_person_time,person_time,1
1071,2_to_4,2026,829,sqlns,uncovered,0.100186,state_person_time,person_time,1
1076,2_to_4,2026,946,sqlns,covered,0.899756,state_person_time,person_time,1
1077,2_to_4,2026,946,sqlns,uncovered,0.100244,state_person_time,person_time,1


In [33]:
sqlns_prev2.query("age == '2_to_4' and scenario=='sqlns' and year>'2022'")

Unnamed: 0,age,year,input_draw,scenario,sq_lns,value,numerator_measure,denominator_measure,multiplier
794,2_to_4,2023,29,sqlns,covered,0.900364,person_time,person_time,1
795,2_to_4,2023,29,sqlns,uncovered,0.099636,person_time,person_time,1
800,2_to_4,2023,223,sqlns,covered,0.900166,person_time,person_time,1
801,2_to_4,2023,223,sqlns,uncovered,0.099834,person_time,person_time,1
...,...,...,...,...,...,...,...,...,...
1070,2_to_4,2026,829,sqlns,covered,0.899814,person_time,person_time,1
1071,2_to_4,2026,829,sqlns,uncovered,0.100186,person_time,person_time,1
1076,2_to_4,2026,946,sqlns,covered,0.899756,person_time,person_time,1
1077,2_to_4,2026,946,sqlns,uncovered,0.100244,person_time,person_time,1


# Test `get_prevalence` against `get_x_factor_prevalence`

They match, and in fact they're exacly equal because I computed them in the same way.

In [44]:
results45 = csr.VivariumResults.cleaned_from_model_spec('4.5.2')
results45.table_names()

['wasting_transition_count',
 'wasting_state_person_time',
 'deaths',
 'stunting_state_person_time',
 'population',
 'ylls',
 'ylds',
 'person_time',
 'cause_state_person_time',
 'cause_transition_count']

In [45]:
strata = ['age', 'year']
xfactor_prev1 = csr.get_x_factor_prevalence(results45, strata)
xfactor_prev2 = csr.get_prevalence(results45, 'x_factor', strata)
xfactor_prev1.equals(xfactor_prev2)

True

In [46]:
xfactor_prev1

Unnamed: 0,age,year,input_draw,scenario,x_factor,value,numerator_measure,denominator_measure,multiplier
0,1-5_months,2022,29,baseline,cat1,0.192696,person_time,person_time,1
1,1-5_months,2022,29,baseline,cat2,0.807304,person_time,person_time,1
2,1-5_months,2022,29,sqlns,cat1,0.192696,person_time,person_time,1
3,1-5_months,2022,29,sqlns,cat2,0.807304,person_time,person_time,1
...,...,...,...,...,...,...,...,...,...
2156,late_neonatal,2026,946,sqlns,cat1,0.184549,person_time,person_time,1
2157,late_neonatal,2026,946,sqlns,cat2,0.815451,person_time,person_time,1
2158,late_neonatal,2026,946,wasting_treatment,cat1,0.184549,person_time,person_time,1
2159,late_neonatal,2026,946,wasting_treatment,cat2,0.815451,person_time,person_time,1


In [47]:
xfactor_prev2

Unnamed: 0,age,year,input_draw,scenario,x_factor,value,numerator_measure,denominator_measure,multiplier
0,1-5_months,2022,29,baseline,cat1,0.192696,person_time,person_time,1
1,1-5_months,2022,29,baseline,cat2,0.807304,person_time,person_time,1
2,1-5_months,2022,29,sqlns,cat1,0.192696,person_time,person_time,1
3,1-5_months,2022,29,sqlns,cat2,0.807304,person_time,person_time,1
...,...,...,...,...,...,...,...,...,...
2156,late_neonatal,2026,946,sqlns,cat1,0.184549,person_time,person_time,1
2157,late_neonatal,2026,946,sqlns,cat2,0.815451,person_time,person_time,1
2158,late_neonatal,2026,946,wasting_treatment,cat1,0.184549,person_time,person_time,1
2159,late_neonatal,2026,946,wasting_treatment,cat2,0.815451,person_time,person_time,1


# Check that new `get_prevalence` still works with risks and causes

In [52]:
wasting_prev = csr.get_prevalence(results, 'wasting_state', ['age', 'year'])
wasting_prev

Unnamed: 0,age,year,input_draw,scenario,wasting_state,value,numerator_measure,denominator_measure,multiplier
0,1-5_months,2022,29,baseline,mild_child_wasting,0.205336,state_person_time,person_time,1
1,1-5_months,2022,29,baseline,moderate_acute_malnutrition,0.088382,state_person_time,person_time,1
2,1-5_months,2022,29,baseline,severe_acute_malnutrition,0.029572,state_person_time,person_time,1
3,1-5_months,2022,29,baseline,susceptible_to_child_wasting,0.676710,state_person_time,person_time,1
...,...,...,...,...,...,...,...,...,...
4316,late_neonatal,2026,946,wasting_treatment,mild_child_wasting,0.200809,state_person_time,person_time,1
4317,late_neonatal,2026,946,wasting_treatment,moderate_acute_malnutrition,0.089108,state_person_time,person_time,1
4318,late_neonatal,2026,946,wasting_treatment,severe_acute_malnutrition,0.032706,state_person_time,person_time,1
4319,late_neonatal,2026,946,wasting_treatment,susceptible_to_child_wasting,0.677378,state_person_time,person_time,1


In [56]:
np.allclose(vp.marginalize(wasting_prev, 'wasting_state').value, 1)

True

In [58]:
stunting_prev = csr.get_prevalence(results, 'stunting_state', ['age', 'sex', 'year'])
stunting_prev

Unnamed: 0,age,sex,year,input_draw,scenario,stunting_state,value,numerator_measure,denominator_measure,multiplier
0,1-5_months,female,2022,29,baseline,cat1,0.040926,state_person_time,person_time,1
1,1-5_months,female,2022,29,baseline,cat2,0.110236,state_person_time,person_time,1
2,1-5_months,female,2022,29,baseline,cat3,0.202617,state_person_time,person_time,1
3,1-5_months,female,2022,29,baseline,cat4,0.646220,state_person_time,person_time,1
...,...,...,...,...,...,...,...,...,...,...
8636,late_neonatal,male,2026,946,wasting_treatment,cat1,0.000000,state_person_time,person_time,1
8637,late_neonatal,male,2026,946,wasting_treatment,cat2,0.000000,state_person_time,person_time,1
8638,late_neonatal,male,2026,946,wasting_treatment,cat3,0.000000,state_person_time,person_time,1
8639,late_neonatal,male,2026,946,wasting_treatment,cat4,1.000000,state_person_time,person_time,1


In [61]:
np.allclose(vp.marginalize(stunting_prev, 'stunting_state').value, 1)

True

In [63]:
cause_prev = csr.get_prevalence(results, 'cause_state', 'year')
cause_prev

Unnamed: 0,year,input_draw,scenario,cause_state,value,numerator_measure,denominator_measure,multiplier
0,2022,29,baseline,diarrheal_diseases,0.027868,state_person_time,person_time,1
1,2022,29,baseline,lower_respiratory_infections,0.001924,state_person_time,person_time,1
2,2022,29,baseline,measles,0.000291,state_person_time,person_time,1
3,2022,29,baseline,susceptible_to_diarrheal_diseases,0.972132,state_person_time,person_time,1
...,...,...,...,...,...,...,...,...
1076,2026,946,wasting_treatment,measles,0.000152,state_person_time,person_time,1
1077,2026,946,wasting_treatment,susceptible_to_diarrheal_diseases,0.976326,state_person_time,person_time,1
1078,2026,946,wasting_treatment,susceptible_to_lower_respiratory_infections,0.997947,state_person_time,person_time,1
1079,2026,946,wasting_treatment,susceptible_to_measles,0.999848,state_person_time,person_time,1


In [65]:
np.allclose(vp.marginalize(cause_prev, 'cause_state').value, 3)

True

# Test `Path.rglob` to find `output.hdf` files

## It's really slow... I don't understand why because this directory subtree is not very big

I should just try writing my own recursive search routine and see how that works...

In [34]:
csr.get_count_data_path(4.1)

'/ihme/costeffectiveness/results/vivarium_ciff_sam/v4.1_wasting_treatment/ciff_sam/2021_09_24_16_36_30/count_data/'

In [36]:
!ls /ihme/costeffectiveness/results/vivarium_ciff_sam/v4.1_wasting_treatment/ciff_sam

2021_09_22_12_12_41  2021_09_23_10_08_40
2021_09_22_17_40_59  2021_09_24_16_36_30


In [37]:
path41 = Path('/ihme/costeffectiveness/results/vivarium_ciff_sam/v4.1_wasting_treatment')
path41

PosixPath('/ihme/costeffectiveness/results/vivarium_ciff_sam/v4.1_wasting_treatment')

In [48]:
result = path41.rglob('output.hdf')
result

<generator object Path.rglob at 0x2b494b7b8200>

In [49]:
%time list(result)

CPU times: user 1.28 s, sys: 4.6 s, total: 5.88 s
Wall time: 1min 6s


[PosixPath('/ihme/costeffectiveness/results/vivarium_ciff_sam/v4.1_wasting_treatment/ciff_sam/2021_09_23_10_08_40/output.hdf'),
 PosixPath('/ihme/costeffectiveness/results/vivarium_ciff_sam/v4.1_wasting_treatment/ciff_sam/2021_09_24_16_36_30/output.hdf'),
 PosixPath('/ihme/costeffectiveness/results/vivarium_ciff_sam/v4.1_wasting_treatment/ciff_sam/2021_09_22_17_40_59/output.hdf'),
 PosixPath('/ihme/costeffectiveness/results/vivarium_ciff_sam/v4.1_wasting_treatment/ciff_sam/2021_09_22_12_12_41/output.hdf')]

In [50]:
result = path41.rglob('count_data')
%time list(result)

CPU times: user 796 ms, sys: 4.25 s, total: 5.05 s
Wall time: 56.7 s


[PosixPath('/ihme/costeffectiveness/results/vivarium_ciff_sam/v4.1_wasting_treatment/ciff_sam/2021_09_24_16_36_30/count_data')]