In [1]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
pd.set_option('display.max_rows', 8)

from scipy import stats
import collections

import warnings
# warnings.filterwarnings('ignore')
from matplotlib.backends.backend_pdf import PdfPages

from pathlib import Path

import db_queries as db
import vivarium_helpers.id_helper as idh
import gbd_mapping
from vivarium import Artifact

# Add the repo directory vivarium_research_ciff_sam/ to sys.path
import os, sys
repo_path = os.path.abspath('../..')
sys.path.append(repo_path)
# Assumes vivarium_research_ciff_sam/ is in sys.path
# import model_validation.vivarium_transformed_output as vto
# import model_validation.vivarium_raw_output as vro
import model_validation.vivarium_output_processing as vp
import model_validation.ciff_sam_results as csr
import model_validation.ciff_sam_plots as csp

%load_ext autoreload
%autoreload 2

!pwd
!whoami
!date

/ihme/homes/ndbs/vivarium_research_ciff_sam/nathaniel/scratch
ndbs
Sat Oct 23 13:59:46 PDT 2021


# Load results and set global index columns

In [2]:
results = csr.VivariumResults.cleaned_from_model_spec('4.5.2')
results.table_names()

['wasting_transition_count',
 'wasting_state_person_time',
 'deaths',
 'stunting_state_person_time',
 'population',
 'ylls',
 'ylds',
 'person_time',
 'cause_state_person_time',
 'cause_transition_count']

In [3]:
vp.set_global_index_columns(vp.INDEX_COLUMNS+['x_factor_effect'])
vp.INDEX_COLUMNS

['input_draw', 'scenario', 'x_factor_effect']

# Test `find_person_time_tables` function

In [4]:
results.wasting_state_person_time

Unnamed: 0,sex,year,wasting_state,measure,input_draw,scenario,x_factor_effect,value,x_factor,sq_lns,wasting_treatment,age
0,female,2022,mild_child_wasting,state_person_time,29,baseline,1.1,0.000000,cat2,covered,covered,early_neonatal
1,female,2022,mild_child_wasting,state_person_time,29,baseline,1.1,0.000000,cat1,covered,covered,early_neonatal
2,female,2022,mild_child_wasting,state_person_time,29,baseline,1.1,0.000000,cat2,covered,uncovered,early_neonatal
3,female,2022,mild_child_wasting,state_person_time,29,baseline,1.1,0.000000,cat1,covered,uncovered,early_neonatal
...,...,...,...,...,...,...,...,...,...,...,...,...
345596,male,2026,susceptible_to_child_wasting,state_person_time,946,wasting_treatment,1.5,16475.270363,cat2,uncovered,covered,2_to_4
345597,male,2026,susceptible_to_child_wasting,state_person_time,946,wasting_treatment,1.5,3419.826146,cat1,uncovered,covered,2_to_4
345598,male,2026,susceptible_to_child_wasting,state_person_time,946,wasting_treatment,1.5,1704.976044,cat2,uncovered,uncovered,2_to_4
345599,male,2026,susceptible_to_child_wasting,state_person_time,946,wasting_treatment,1.5,384.646133,cat1,uncovered,uncovered,2_to_4


In [6]:
csr.find_person_time_tables(results, 'x_factor')

['wasting_state_person_time', 'person_time']

In [7]:
csr.find_person_time_tables(results, 'x_factor_effect')

['wasting_state_person_time',
 'stunting_state_person_time',
 'person_time',
 'cause_state_person_time']

In [8]:
csr.find_person_time_tables(results, 'cause_state')

['cause_state_person_time']

## Change function to a generator

In [9]:
csr.find_person_time_tables(results, 'x_factor')

<generator object find_person_time_tables.<locals>.<genexpr> at 0x2b40fcbd8970>

In [10]:
list(_)

['wasting_state_person_time', 'person_time']

# Test `get_prevalence` with update to find person-time tables

# Compute prevalence of SQLNS conditional on year, age, and wasting state:

It worked! Woo hoo!

In [12]:
under_6mo, over_6mo, all_ages = map(list, csr.get_age_group_bins('6-11_months', 'all_ages'))
over_6mo

['6-11_months', '12_to_23_months', '2_to_4']

In [14]:
prefilter_query = f"age in {over_6mo} and scenario=='sqlns'"
sqlns_prev_by_year_age_wasting1 = vp.ratio(
    results.wasting_state_person_time.query(prefilter_query),
    results.wasting_state_person_time.query(prefilter_query),
    strata=['year', 'age', 'wasting_state'],
    numerator_broadcast='sq_lns',
)
sqlns_prev_by_year_age_wasting1

Unnamed: 0,year,age,wasting_state,input_draw,scenario,x_factor_effect,sq_lns,value,numerator_measure,denominator_measure,multiplier
0,2022,12_to_23_months,mild_child_wasting,29,sqlns,1.1,covered,0.000000,state_person_time,state_person_time,1
1,2022,12_to_23_months,mild_child_wasting,29,sqlns,1.1,uncovered,1.000000,state_person_time,state_person_time,1
2,2022,12_to_23_months,mild_child_wasting,29,sqlns,1.2,covered,0.000000,state_person_time,state_person_time,1
3,2022,12_to_23_months,mild_child_wasting,29,sqlns,1.2,uncovered,1.000000,state_person_time,state_person_time,1
...,...,...,...,...,...,...,...,...,...,...,...
7196,2026,6-11_months,susceptible_to_child_wasting,946,sqlns,1.4,covered,0.895991,state_person_time,state_person_time,1
7197,2026,6-11_months,susceptible_to_child_wasting,946,sqlns,1.4,uncovered,0.104009,state_person_time,state_person_time,1
7198,2026,6-11_months,susceptible_to_child_wasting,946,sqlns,1.5,covered,0.895979,state_person_time,state_person_time,1
7199,2026,6-11_months,susceptible_to_child_wasting,946,sqlns,1.5,uncovered,0.104021,state_person_time,state_person_time,1


In [16]:
sqlns_prev_by_year_age_wasting2 = csr.get_prevalence(
    results,
    state_variable='sq_lns',
    strata=['year', 'age', 'wasting_state'],
    prefilter_query=prefilter_query
)
sqlns_prev_by_year_age_wasting2

Unnamed: 0,year,age,wasting_state,input_draw,scenario,x_factor_effect,sq_lns,value,numerator_measure,denominator_measure,multiplier,measure
0,2022,12_to_23_months,mild_child_wasting,29,sqlns,1.1,covered,0.000000,state_person_time,state_person_time,1,prevalence
1,2022,12_to_23_months,mild_child_wasting,29,sqlns,1.1,uncovered,1.000000,state_person_time,state_person_time,1,prevalence
2,2022,12_to_23_months,mild_child_wasting,29,sqlns,1.2,covered,0.000000,state_person_time,state_person_time,1,prevalence
3,2022,12_to_23_months,mild_child_wasting,29,sqlns,1.2,uncovered,1.000000,state_person_time,state_person_time,1,prevalence
...,...,...,...,...,...,...,...,...,...,...,...,...
7196,2026,6-11_months,susceptible_to_child_wasting,946,sqlns,1.4,covered,0.895991,state_person_time,state_person_time,1,prevalence
7197,2026,6-11_months,susceptible_to_child_wasting,946,sqlns,1.4,uncovered,0.104009,state_person_time,state_person_time,1,prevalence
7198,2026,6-11_months,susceptible_to_child_wasting,946,sqlns,1.5,covered,0.895979,state_person_time,state_person_time,1,prevalence
7199,2026,6-11_months,susceptible_to_child_wasting,946,sqlns,1.5,uncovered,0.104021,state_person_time,state_person_time,1,prevalence


In [18]:
sqlns_prev_by_year_age_wasting1.equals(sqlns_prev_by_year_age_wasting2.drop(columns='measure'))

True

# See what happens if we can't find appropriate person-time tables

We get a StopIteration Error.

In [19]:
csr.get_prevalence(
    results,
    state_variable='sq_lns',
    strata=['year', 'cause_state', 'wasting_state'],
    prefilter_query=prefilter_query
)

StopIteration: 

# Use try/except to raise more useful error messages

In [20]:
csr.get_prevalence(
    results,
    state_variable='sq_lns',
    strata=['year', 'cause_state', 'wasting_state'],
    prefilter_query=prefilter_query
)

ValueError: No person-time table found with numerator columns ['year', 'cause_state', 'wasting_state', 'sq_lns']

In [22]:
csr.get_prevalence(
    results,
    state_variable='sq_lns',
    strata=['year', 'wasting_state'],
    prefilter_query=prefilter_query,
    denominator_broadcast='cause_state'
)

ValueError: No person-time table found with denominator columns ['year', 'wasting_state', 'cause_state']

# Test `get_prevalence` after deleting `default=[]` from numerator columns

In [23]:
results.wasting_state_person_time

Unnamed: 0,sex,year,wasting_state,measure,input_draw,scenario,x_factor_effect,value,x_factor,sq_lns,wasting_treatment,age
0,female,2022,mild_child_wasting,state_person_time,29,baseline,1.1,0.000000,cat2,covered,covered,early_neonatal
1,female,2022,mild_child_wasting,state_person_time,29,baseline,1.1,0.000000,cat1,covered,covered,early_neonatal
2,female,2022,mild_child_wasting,state_person_time,29,baseline,1.1,0.000000,cat2,covered,uncovered,early_neonatal
3,female,2022,mild_child_wasting,state_person_time,29,baseline,1.1,0.000000,cat1,covered,uncovered,early_neonatal
...,...,...,...,...,...,...,...,...,...,...,...,...
345596,male,2026,susceptible_to_child_wasting,state_person_time,946,wasting_treatment,1.5,16475.270363,cat2,uncovered,covered,2_to_4
345597,male,2026,susceptible_to_child_wasting,state_person_time,946,wasting_treatment,1.5,3419.826146,cat1,uncovered,covered,2_to_4
345598,male,2026,susceptible_to_child_wasting,state_person_time,946,wasting_treatment,1.5,1704.976044,cat2,uncovered,uncovered,2_to_4
345599,male,2026,susceptible_to_child_wasting,state_person_time,946,wasting_treatment,1.5,384.646133,cat1,uncovered,uncovered,2_to_4


In [25]:
x_factor_by_year_wasting_state_sqlns = csr.get_prevalence(
    results,
    state_variable='x_factor',
    strata=['year', 'wasting_state', 'sq_lns'],
    prefilter_query=prefilter_query,
)
x_factor_by_year_wasting_state_sqlns

Unnamed: 0,year,wasting_state,sq_lns,input_draw,scenario,x_factor_effect,x_factor,value,numerator_measure,denominator_measure,multiplier,measure
0,2022,mild_child_wasting,covered,29,sqlns,1.1,cat1,,state_person_time,state_person_time,1,prevalence
1,2022,mild_child_wasting,covered,29,sqlns,1.1,cat2,,state_person_time,state_person_time,1,prevalence
2,2022,mild_child_wasting,covered,29,sqlns,1.2,cat1,,state_person_time,state_person_time,1,prevalence
3,2022,mild_child_wasting,covered,29,sqlns,1.2,cat2,,state_person_time,state_person_time,1,prevalence
...,...,...,...,...,...,...,...,...,...,...,...,...
4796,2026,susceptible_to_child_wasting,uncovered,946,sqlns,1.4,cat1,0.173366,state_person_time,state_person_time,1,prevalence
4797,2026,susceptible_to_child_wasting,uncovered,946,sqlns,1.4,cat2,0.826634,state_person_time,state_person_time,1,prevalence
4798,2026,susceptible_to_child_wasting,uncovered,946,sqlns,1.5,cat1,0.172345,state_person_time,state_person_time,1,prevalence
4799,2026,susceptible_to_child_wasting,uncovered,946,sqlns,1.5,cat2,0.827655,state_person_time,state_person_time,1,prevalence


In [26]:
vp.marginalize(x_factor_by_year_wasting_state_sqlns, 'x_factor')

Unnamed: 0,denominator_measure,input_draw,measure,multiplier,numerator_measure,scenario,sq_lns,wasting_state,x_factor_effect,year,value
0,state_person_time,29,prevalence,1,state_person_time,sqlns,covered,mild_child_wasting,1.1,2022,0.0
1,state_person_time,29,prevalence,1,state_person_time,sqlns,covered,mild_child_wasting,1.1,2023,1.0
2,state_person_time,29,prevalence,1,state_person_time,sqlns,covered,mild_child_wasting,1.1,2024,1.0
3,state_person_time,29,prevalence,1,state_person_time,sqlns,covered,mild_child_wasting,1.1,2025,1.0
...,...,...,...,...,...,...,...,...,...,...,...
2396,state_person_time,946,prevalence,1,state_person_time,sqlns,uncovered,susceptible_to_child_wasting,1.5,2023,1.0
2397,state_person_time,946,prevalence,1,state_person_time,sqlns,uncovered,susceptible_to_child_wasting,1.5,2024,1.0
2398,state_person_time,946,prevalence,1,state_person_time,sqlns,uncovered,susceptible_to_child_wasting,1.5,2025,1.0
2399,state_person_time,946,prevalence,1,state_person_time,sqlns,uncovered,susceptible_to_child_wasting,1.5,2026,1.0


In [27]:
# Make sure prevalences are adding up to 1 (or 0)
vp.marginalize(x_factor_by_year_wasting_state_sqlns, 'x_factor').value.unique()

array([0., 1., 1., 1., 1.])

# Test `get_relative_risk` function with 'wasting_state' as risk factor

Now it works since we look up the appropriate person-time table in `get_prevalence`.

In [28]:
csr.get_relative_risk(
    data=results,
    measure='prevalence',
    outcome='sq_lns',
    strata=['year', 'age'],
    factor='wasting_state',
    reference_category='susceptible_to_child_wasting',
    prefilter_query = f"age in {over_6mo} and scenario=='sqlns'",
)

Unnamed: 0,year,age,sq_lns,input_draw,scenario,x_factor_effect,numerator_wasting_state,denominator_wasting_state,value,numerator_measure,denominator_measure,multiplier,measure
0,2022,12_to_23_months,covered,29,sqlns,1.1,mild_child_wasting,susceptible_to_child_wasting,,prevalence,prevalence,1,relative_risk
1,2022,12_to_23_months,covered,29,sqlns,1.1,moderate_acute_malnutrition,susceptible_to_child_wasting,,prevalence,prevalence,1,relative_risk
2,2022,12_to_23_months,covered,29,sqlns,1.1,severe_acute_malnutrition,susceptible_to_child_wasting,,prevalence,prevalence,1,relative_risk
3,2022,12_to_23_months,covered,29,sqlns,1.2,mild_child_wasting,susceptible_to_child_wasting,,prevalence,prevalence,1,relative_risk
...,...,...,...,...,...,...,...,...,...,...,...,...,...
5396,2026,6-11_months,uncovered,946,sqlns,1.4,severe_acute_malnutrition,susceptible_to_child_wasting,0.982480,prevalence,prevalence,1,relative_risk
5397,2026,6-11_months,uncovered,946,sqlns,1.5,mild_child_wasting,susceptible_to_child_wasting,0.920247,prevalence,prevalence,1,relative_risk
5398,2026,6-11_months,uncovered,946,sqlns,1.5,moderate_acute_malnutrition,susceptible_to_child_wasting,1.065985,prevalence,prevalence,1,relative_risk
5399,2026,6-11_months,uncovered,946,sqlns,1.5,severe_acute_malnutrition,susceptible_to_child_wasting,0.955662,prevalence,prevalence,1,relative_risk


# Test code after refactoring try/except into new `get_person_time_table_name` function

Looks like it's working.

In [29]:
sqlns_prev_by_year_age_wasting3 = csr.get_prevalence(
    results,
    state_variable='sq_lns',
    strata=['year', 'age', 'wasting_state'],
    prefilter_query=prefilter_query
)
sqlns_prev_by_year_age_wasting3

Unnamed: 0,year,age,wasting_state,input_draw,scenario,x_factor_effect,sq_lns,value,numerator_measure,denominator_measure,multiplier,measure
0,2022,12_to_23_months,mild_child_wasting,29,sqlns,1.1,covered,0.000000,state_person_time,state_person_time,1,prevalence
1,2022,12_to_23_months,mild_child_wasting,29,sqlns,1.1,uncovered,1.000000,state_person_time,state_person_time,1,prevalence
2,2022,12_to_23_months,mild_child_wasting,29,sqlns,1.2,covered,0.000000,state_person_time,state_person_time,1,prevalence
3,2022,12_to_23_months,mild_child_wasting,29,sqlns,1.2,uncovered,1.000000,state_person_time,state_person_time,1,prevalence
...,...,...,...,...,...,...,...,...,...,...,...,...
7196,2026,6-11_months,susceptible_to_child_wasting,946,sqlns,1.4,covered,0.895991,state_person_time,state_person_time,1,prevalence
7197,2026,6-11_months,susceptible_to_child_wasting,946,sqlns,1.4,uncovered,0.104009,state_person_time,state_person_time,1,prevalence
7198,2026,6-11_months,susceptible_to_child_wasting,946,sqlns,1.5,covered,0.895979,state_person_time,state_person_time,1,prevalence
7199,2026,6-11_months,susceptible_to_child_wasting,946,sqlns,1.5,uncovered,0.104021,state_person_time,state_person_time,1,prevalence


In [30]:
sqlns_prev_by_year_age_wasting1.equals(sqlns_prev_by_year_age_wasting3.drop(columns='measure'))

True

In [31]:
csr.get_relative_risk(
    data=results,
    measure='prevalence',
    outcome='sq_lns',
    strata=['year', 'age'],
    factor='wasting_state',
    reference_category='susceptible_to_child_wasting',
    prefilter_query = f"age in {over_6mo} and scenario=='sqlns'",
)

Unnamed: 0,year,age,sq_lns,input_draw,scenario,x_factor_effect,numerator_wasting_state,denominator_wasting_state,value,numerator_measure,denominator_measure,multiplier,measure
0,2022,12_to_23_months,covered,29,sqlns,1.1,mild_child_wasting,susceptible_to_child_wasting,,prevalence,prevalence,1,relative_risk
1,2022,12_to_23_months,covered,29,sqlns,1.1,moderate_acute_malnutrition,susceptible_to_child_wasting,,prevalence,prevalence,1,relative_risk
2,2022,12_to_23_months,covered,29,sqlns,1.1,severe_acute_malnutrition,susceptible_to_child_wasting,,prevalence,prevalence,1,relative_risk
3,2022,12_to_23_months,covered,29,sqlns,1.2,mild_child_wasting,susceptible_to_child_wasting,,prevalence,prevalence,1,relative_risk
...,...,...,...,...,...,...,...,...,...,...,...,...,...
5396,2026,6-11_months,uncovered,946,sqlns,1.4,severe_acute_malnutrition,susceptible_to_child_wasting,0.982480,prevalence,prevalence,1,relative_risk
5397,2026,6-11_months,uncovered,946,sqlns,1.5,mild_child_wasting,susceptible_to_child_wasting,0.920247,prevalence,prevalence,1,relative_risk
5398,2026,6-11_months,uncovered,946,sqlns,1.5,moderate_acute_malnutrition,susceptible_to_child_wasting,1.065985,prevalence,prevalence,1,relative_risk
5399,2026,6-11_months,uncovered,946,sqlns,1.5,severe_acute_malnutrition,susceptible_to_child_wasting,0.955662,prevalence,prevalence,1,relative_risk


In [33]:
csr.get_prevalence(
    results,
    state_variable='sq_lns',
    strata=['year', 'cause_state', 'wasting_state'],
    prefilter_query=prefilter_query
)

ValueError: No person-time table found with columns ['year', 'cause_state', 'wasting_state', 'sq_lns']. (Excluded tables: cause_state_person_time)

In [34]:
csr.get_prevalence(
    results,
    state_variable='sq_lns',
    strata=['year', 'wasting_state'],
    prefilter_query=prefilter_query,
    denominator_broadcast='cause_state'
)

ValueError: No person-time table found with columns ['year', 'wasting_state', 'cause_state']. (Excluded tables: cause_state_person_time)