In [1]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
pd.set_option('display.max_rows', 8)

import collections

import warnings
# warnings.filterwarnings('ignore')
from matplotlib.backends.backend_pdf import PdfPages

from pathlib import Path

import db_queries as db
import vivarium_helpers.id_helper as idh

# Add the repo directory vivarium_research_ciff_sam/ to sys.path
import os, sys
repo_path = os.path.abspath('../..')
sys.path.append(repo_path)
# Assumes vivarium_research_ciff_sam/ is in sys.path
import model_validation.vivarium_transformed_output as vto
# import model_validation.vivarium_raw_output as vro
import model_validation.vivarium_output_processing as vo

!pwd
!whoami
!date

/ihme/homes/ndbs/vivarium_research_ciff_sam/nathaniel/scratch__gitignore__
ndbs
Fri Aug 20 14:53:40 PDT 2021


In [2]:
%load_ext autoreload
%autoreload 2

In [3]:
project_results_dir = '/ihme/costeffectiveness/results/vivarium_ciff_sam'
model_name = 'v2.5_stunting'
model_timestamp = '2021_08_05_16_17_12'
username = 'ndbs'

In [4]:
model_count_data_dir = f'{project_results_dir}/{model_name}/ciff_sam/{model_timestamp}/count_data/'

project_vv_directory_name = 'ciff_malnutrition/verification_and_validation'

output_dir = f'/ihme/homes/{username}/vivarium_results/{project_vv_directory_name}/{model_name}'
share_output_dir = f'/share/scratch/users/ndbs/vivarium_results/{project_vv_directory_name}/{model_name}'
j_output_dir = f'/home/j/Project/simulation_science/{project_vv_directory_name}/{model_name}'

# Create the output directories if they don't exist
# Note from Path.mkdir() documentation:
#   "If mode is given, it is combined with the process’ umask value to determine the file mode and access flags."
#
# I don't know what this notebook process' umask value will be, so I don't know if this will actually result
# in the correct (most permissive) permissions for the directories...
# for directory in [output_dir, share_output_dir, j_output_dir]:
#     Path(directory).mkdir(mode=0o777, parents=True, exist_ok=True)

# Load transformed count data

In [12]:
orig_data = vto.VivariumTransformedOutput.from_directory(model_count_data_dir)
orig_data.table_names()

['wasting_transition_count',
 'wasting_state_person_time',
 'deaths',
 'population',
 'ylls',
 'ylds',
 'disease_state_person_time',
 'disease_transition_count']

In [13]:
orig_data.wasting_state_person_time

Unnamed: 0,sex,year,cause,measure,input_draw,scenario,value,age
0,female,2022,mild_child_wasting,state_person_time,29,baseline,414.090349,early_neonatal
1,female,2022,mild_child_wasting,state_person_time,29,baseline,1234.535250,late_neonatal
2,female,2022,mild_child_wasting,state_person_time,29,baseline,8926.954141,1-5_months
3,female,2022,mild_child_wasting,state_person_time,29,baseline,10434.387406,6-11_months
...,...,...,...,...,...,...,...,...
2876,male,2026,susceptible_to_child_wasting,state_person_time,946,baseline,30258.041068,1-5_months
2877,male,2026,susceptible_to_child_wasting,state_person_time,946,baseline,35695.241615,6-11_months
2878,male,2026,susceptible_to_child_wasting,state_person_time,946,baseline,70879.949350,12_to_23_months
2879,male,2026,susceptible_to_child_wasting,state_person_time,946,baseline,215444.187543,2_to_4


In [14]:
orig_data.disease_state_person_time

Unnamed: 0,sex,year,cause,measure,input_draw,scenario,value,stunting_state,age
0,female,2022,diarrheal_diseases,state_person_time,29,baseline,43.489391,cat4,early_neonatal
1,female,2022,diarrheal_diseases,state_person_time,29,baseline,0.000000,cat3,early_neonatal
2,female,2022,diarrheal_diseases,state_person_time,29,baseline,0.000000,cat2,early_neonatal
3,female,2022,diarrheal_diseases,state_person_time,29,baseline,0.000000,cat1,early_neonatal
...,...,...,...,...,...,...,...,...,...
17276,male,2026,susceptible_to_measles,state_person_time,946,baseline,121118.023272,cat4,2_to_4
17277,male,2026,susceptible_to_measles,state_person_time,946,baseline,67325.987680,cat3,2_to_4
17278,male,2026,susceptible_to_measles,state_person_time,946,baseline,66608.265572,cat2,2_to_4
17279,male,2026,susceptible_to_measles,state_person_time,946,baseline,63010.346338,cat1,2_to_4


In [15]:
orig_data.disease_state_person_time.cause.unique()

array(['diarrheal_diseases', 'lower_respiratory_infections', 'measles',
       'susceptible_to_diarrheal_diseases',
       'susceptible_to_lower_respiratory_infections',
       'susceptible_to_measles'], dtype=object)

In [37]:
orig_data.disease_transition_count

Unnamed: 0,sex,year,measure,input_draw,scenario,value,stunting_state,age
0,female,2022,diarrheal_diseases_to_susceptible_to_diarrheal...,29,baseline,2475.0,cat4,early_neonatal
1,female,2022,diarrheal_diseases_to_susceptible_to_diarrheal...,29,baseline,0.0,cat3,early_neonatal
2,female,2022,diarrheal_diseases_to_susceptible_to_diarrheal...,29,baseline,0.0,cat2,early_neonatal
3,female,2022,diarrheal_diseases_to_susceptible_to_diarrheal...,29,baseline,0.0,cat1,early_neonatal
...,...,...,...,...,...,...,...,...
17276,male,2026,susceptible_to_measles_to_measles_event_count,946,baseline,447.0,cat4,2_to_4
17277,male,2026,susceptible_to_measles_to_measles_event_count,946,baseline,243.0,cat3,2_to_4
17278,male,2026,susceptible_to_measles_to_measles_event_count,946,baseline,368.0,cat2,2_to_4
17279,male,2026,susceptible_to_measles_to_measles_event_count,946,baseline,642.0,cat1,2_to_4


In [30]:
def clean_transformed_data(data):
    """Reformat transformed count data to make more sense."""
    # Rename mislabeled 'cause' column in `wasting_state_person_time`
    wasting_state_person_time = data.wasting_state_person_time.rename(columns={'cause':'wasting_state'})
    # Rename poorly named 'cause' column in `disease_state_person_time` and add an actual cause column
    disease_state_person_time = (
        data.disease_state_person_time
        .rename(columns={'cause':'cause_state'})
        .assign(cause=lambda df: df['cause_state'].str.replace('susceptible_to_', ''))
    )
    # Define a function to make the transition count dataframes better
    def clean_transition_df(df):
        return (df
                .assign(transition=lambda df: df['measure'].str.replace('_event_count', ''))
                .assign(measure='transition_count')
               )
    # Make the wasting and disease transition count dataframes better
    wasting_transition_count, disease_transition_count = map(
        clean_transition_df, (data.wasting_transition_count, data.disease_transition_count)
    )
    # Create a dictionary with the original or cleaned dataframes and create a cleaned Output object
    data_dict = data.to_dict()
    data_dict.update(
        {'wasting_state_person_time': wasting_state_person_time,
         'disease_state_person_time': disease_state_person_time,
         'wasting_transition_count': wasting_transition_count,
         'disease_transition_count': disease_transition_count,
        }
    )
    clean_data = vto.VivariumTransformedOutput(data_dict)
    return clean_data

In [31]:
data = clean_transformed_data(orig_data)
data.table_names()

['wasting_transition_count',
 'wasting_state_person_time',
 'deaths',
 'population',
 'ylls',
 'ylds',
 'disease_state_person_time',
 'disease_transition_count']

In [32]:
data.disease_state_person_time

Unnamed: 0,sex,year,cause_state,measure,input_draw,scenario,value,stunting_state,age,cause
0,female,2022,diarrheal_diseases,state_person_time,29,baseline,43.489391,cat4,early_neonatal,diarrheal_diseases
1,female,2022,diarrheal_diseases,state_person_time,29,baseline,0.000000,cat3,early_neonatal,diarrheal_diseases
2,female,2022,diarrheal_diseases,state_person_time,29,baseline,0.000000,cat2,early_neonatal,diarrheal_diseases
3,female,2022,diarrheal_diseases,state_person_time,29,baseline,0.000000,cat1,early_neonatal,diarrheal_diseases
...,...,...,...,...,...,...,...,...,...,...
17276,male,2026,susceptible_to_measles,state_person_time,946,baseline,121118.023272,cat4,2_to_4,measles
17277,male,2026,susceptible_to_measles,state_person_time,946,baseline,67325.987680,cat3,2_to_4,measles
17278,male,2026,susceptible_to_measles,state_person_time,946,baseline,66608.265572,cat2,2_to_4,measles
17279,male,2026,susceptible_to_measles,state_person_time,946,baseline,63010.346338,cat1,2_to_4,measles


In [33]:
orig_data.disease_state_person_time

Unnamed: 0,sex,year,cause,measure,input_draw,scenario,value,stunting_state,age
0,female,2022,diarrheal_diseases,state_person_time,29,baseline,43.489391,cat4,early_neonatal
1,female,2022,diarrheal_diseases,state_person_time,29,baseline,0.000000,cat3,early_neonatal
2,female,2022,diarrheal_diseases,state_person_time,29,baseline,0.000000,cat2,early_neonatal
3,female,2022,diarrheal_diseases,state_person_time,29,baseline,0.000000,cat1,early_neonatal
...,...,...,...,...,...,...,...,...,...
17276,male,2026,susceptible_to_measles,state_person_time,946,baseline,121118.023272,cat4,2_to_4
17277,male,2026,susceptible_to_measles,state_person_time,946,baseline,67325.987680,cat3,2_to_4
17278,male,2026,susceptible_to_measles,state_person_time,946,baseline,66608.265572,cat2,2_to_4
17279,male,2026,susceptible_to_measles,state_person_time,946,baseline,63010.346338,cat1,2_to_4


In [34]:
data.disease_transition_count

Unnamed: 0,sex,year,measure,input_draw,scenario,value,stunting_state,age,transition
0,female,2022,transition_count,29,baseline,2475.0,cat4,early_neonatal,diarrheal_diseases_to_susceptible_to_diarrheal...
1,female,2022,transition_count,29,baseline,0.0,cat3,early_neonatal,diarrheal_diseases_to_susceptible_to_diarrheal...
2,female,2022,transition_count,29,baseline,0.0,cat2,early_neonatal,diarrheal_diseases_to_susceptible_to_diarrheal...
3,female,2022,transition_count,29,baseline,0.0,cat1,early_neonatal,diarrheal_diseases_to_susceptible_to_diarrheal...
...,...,...,...,...,...,...,...,...,...
17276,male,2026,transition_count,946,baseline,447.0,cat4,2_to_4,susceptible_to_measles_to_measles
17277,male,2026,transition_count,946,baseline,243.0,cat3,2_to_4,susceptible_to_measles_to_measles
17278,male,2026,transition_count,946,baseline,368.0,cat2,2_to_4,susceptible_to_measles_to_measles
17279,male,2026,transition_count,946,baseline,642.0,cat1,2_to_4,susceptible_to_measles_to_measles


In [35]:
data.disease_transition_count.measure.unique()

array(['transition_count'], dtype=object)

In [36]:
data.wasting_transition_count

Unnamed: 0,sex,year,measure,input_draw,scenario,value,age,transition
0,female,2022,transition_count,29,baseline,1245.0,early_neonatal,mild_child_wasting_to_moderate_acute_malnutrition
1,female,2022,transition_count,29,baseline,3803.0,late_neonatal,mild_child_wasting_to_moderate_acute_malnutrition
2,female,2022,transition_count,29,baseline,29178.0,1-5_months,mild_child_wasting_to_moderate_acute_malnutrition
3,female,2022,transition_count,29,baseline,34038.0,6-11_months,mild_child_wasting_to_moderate_acute_malnutrition
...,...,...,...,...,...,...,...,...
5036,male,2026,transition_count,946,baseline,3330.0,1-5_months,susceptible_to_child_wasting_to_mild_child_was...
5037,male,2026,transition_count,946,baseline,4391.0,6-11_months,susceptible_to_child_wasting_to_mild_child_was...
5038,male,2026,transition_count,946,baseline,8691.0,12_to_23_months,susceptible_to_child_wasting_to_mild_child_was...
5039,male,2026,transition_count,946,baseline,23097.0,2_to_4,susceptible_to_child_wasting_to_mild_child_was...


In [27]:
x,y,z,w = map(lambda x: x**2, [1,2,3,5])

In [28]:
x

1

In [29]:
w

25

# Test ratio function

In [40]:
# Call function before making any edits
orig_ratio = vo.ratio(
    data.wasting_transition_count,
    data.wasting_state_person_time,
    strata=['year', 'sex', 'age'],
    numerator_broadcast='transition',
    denominator_broadcast='wasting_state',
)
orig_ratio

Unnamed: 0,year,sex,age,input_draw,scenario,transition,wasting_state,value,numerator_measure,denominator_measure,multiplier
0,2022,female,1-5_months,29,baseline,mild_child_wasting_to_moderate_acute_malnutrition,mild_child_wasting,3.268528,transition_count,state_person_time,1
1,2022,female,1-5_months,29,baseline,mild_child_wasting_to_moderate_acute_malnutrition,moderate_acute_malnutrition,8.254240,transition_count,state_person_time,1
2,2022,female,1-5_months,29,baseline,mild_child_wasting_to_moderate_acute_malnutrition,severe_acute_malnutrition,31.588639,transition_count,state_person_time,1
3,2022,female,1-5_months,29,baseline,mild_child_wasting_to_moderate_acute_malnutrition,susceptible_to_child_wasting,0.948723,transition_count,state_person_time,1
...,...,...,...,...,...,...,...,...,...,...,...
20156,2026,male,late_neonatal,946,baseline,susceptible_to_child_wasting_to_mild_child_was...,mild_child_wasting,0.306930,transition_count,state_person_time,1
20157,2026,male,late_neonatal,946,baseline,susceptible_to_child_wasting_to_mild_child_was...,moderate_acute_malnutrition,0.673521,transition_count,state_person_time,1
20158,2026,male,late_neonatal,946,baseline,susceptible_to_child_wasting_to_mild_child_was...,severe_acute_malnutrition,1.797606,transition_count,state_person_time,1
20159,2026,male,late_neonatal,946,baseline,susceptible_to_child_wasting_to_mild_child_was...,susceptible_to_child_wasting,0.095641,transition_count,state_person_time,1


In [59]:
# Edit vo module and make sure I get the same thing
tcount_by_wstate_pt = vo.ratio(
    data.wasting_transition_count,
    data.wasting_state_person_time,
    strata=['year', 'sex', 'age'],
    numerator_broadcast='transition',
    denominator_broadcast='wasting_state',
)
print(tcount_by_wstate_pt.equals(orig_ratio))
tcount_by_wstate_pt

True


Unnamed: 0,year,sex,age,input_draw,scenario,transition,wasting_state,value,numerator_measure,denominator_measure,multiplier
0,2022,female,1-5_months,29,baseline,mild_child_wasting_to_moderate_acute_malnutrition,mild_child_wasting,3.268528,transition_count,state_person_time,1
1,2022,female,1-5_months,29,baseline,mild_child_wasting_to_moderate_acute_malnutrition,moderate_acute_malnutrition,8.254240,transition_count,state_person_time,1
2,2022,female,1-5_months,29,baseline,mild_child_wasting_to_moderate_acute_malnutrition,severe_acute_malnutrition,31.588639,transition_count,state_person_time,1
3,2022,female,1-5_months,29,baseline,mild_child_wasting_to_moderate_acute_malnutrition,susceptible_to_child_wasting,0.948723,transition_count,state_person_time,1
...,...,...,...,...,...,...,...,...,...,...,...
20156,2026,male,late_neonatal,946,baseline,susceptible_to_child_wasting_to_mild_child_was...,mild_child_wasting,0.306930,transition_count,state_person_time,1
20157,2026,male,late_neonatal,946,baseline,susceptible_to_child_wasting_to_mild_child_was...,moderate_acute_malnutrition,0.673521,transition_count,state_person_time,1
20158,2026,male,late_neonatal,946,baseline,susceptible_to_child_wasting_to_mild_child_was...,severe_acute_malnutrition,1.797606,transition_count,state_person_time,1
20159,2026,male,late_neonatal,946,baseline,susceptible_to_child_wasting_to_mild_child_was...,susceptible_to_child_wasting,0.095641,transition_count,state_person_time,1


# Test some additional functionality

In [51]:
vo.ratio(
    data.wasting_transition_count,
    data.wasting_state_person_time,
    strata=['year', 'sex', 'age'],
    numerator_broadcast=['transition', 'wasting_state'],
    denominator_broadcast='wasting_state',
)

ValueError: `numerator_broadcast` and `denominator_broadcast` must be disjoint lists of column names. Any column to include in both the numerator and denominator should go in `strata`.

In [53]:
vo.ratio(
    data.disease_transition_count,
    data.wasting_state_person_time,
    strata=['year', 'sex', 'age'],
    numerator_broadcast=['transition', 'stunting_state'],
    denominator_broadcast='wasting_state',
)

['year', 'sex', 'age', 'transition', 'stunting_state', 'input_draw', 'scenario']


Unnamed: 0,year,sex,age,input_draw,scenario,transition,stunting_state,wasting_state,value,numerator_measure,denominator_measure,multiplier
0,2022,female,1-5_months,29,baseline,diarrheal_diseases_to_susceptible_to_diarrheal...,cat1,mild_child_wasting,1.272999,transition_count,state_person_time,1
1,2022,female,1-5_months,29,baseline,diarrheal_diseases_to_susceptible_to_diarrheal...,cat1,moderate_acute_malnutrition,3.214792,transition_count,state_person_time,1
2,2022,female,1-5_months,29,baseline,diarrheal_diseases_to_susceptible_to_diarrheal...,cat1,severe_acute_malnutrition,12.302875,transition_count,state_person_time,1
3,2022,female,1-5_months,29,baseline,diarrheal_diseases_to_susceptible_to_diarrheal...,cat1,susceptible_to_child_wasting,0.369500,transition_count,state_person_time,1
...,...,...,...,...,...,...,...,...,...,...,...,...
69116,2026,male,late_neonatal,946,baseline,susceptible_to_measles_to_measles,cat4,mild_child_wasting,0.000000,transition_count,state_person_time,1
69117,2026,male,late_neonatal,946,baseline,susceptible_to_measles_to_measles,cat4,moderate_acute_malnutrition,0.000000,transition_count,state_person_time,1
69118,2026,male,late_neonatal,946,baseline,susceptible_to_measles_to_measles,cat4,severe_acute_malnutrition,0.000000,transition_count,state_person_time,1
69119,2026,male,late_neonatal,946,baseline,susceptible_to_measles_to_measles,cat4,susceptible_to_child_wasting,0.000000,transition_count,state_person_time,1


In [58]:
vo.ratio(
    data.wasting_state_person_time,
    data.disease_transition_count,
    strata=['year', 'sex', 'age'],
    numerator_broadcast='wasting_state',
    denominator_broadcast=['transition', 'stunting_state'],
)

Unnamed: 0,year,sex,age,input_draw,scenario,wasting_state,transition,stunting_state,value,numerator_measure,denominator_measure,multiplier
0,2022,female,1-5_months,29,baseline,mild_child_wasting,diarrheal_diseases_to_susceptible_to_diarrheal...,cat1,0.785547,state_person_time,transition_count,1
1,2022,female,1-5_months,29,baseline,mild_child_wasting,diarrheal_diseases_to_susceptible_to_diarrheal...,cat2,0.562292,state_person_time,transition_count,1
2,2022,female,1-5_months,29,baseline,mild_child_wasting,diarrheal_diseases_to_susceptible_to_diarrheal...,cat3,0.359943,state_person_time,transition_count,1
3,2022,female,1-5_months,29,baseline,mild_child_wasting,diarrheal_diseases_to_susceptible_to_diarrheal...,cat4,0.153000,state_person_time,transition_count,1
...,...,...,...,...,...,...,...,...,...,...,...,...
69116,2026,male,late_neonatal,946,baseline,susceptible_to_child_wasting,susceptible_to_measles_to_measles,cat1,inf,state_person_time,transition_count,1
69117,2026,male,late_neonatal,946,baseline,susceptible_to_child_wasting,susceptible_to_measles_to_measles,cat2,inf,state_person_time,transition_count,1
69118,2026,male,late_neonatal,946,baseline,susceptible_to_child_wasting,susceptible_to_measles_to_measles,cat3,inf,state_person_time,transition_count,1
69119,2026,male,late_neonatal,946,baseline,susceptible_to_child_wasting,susceptible_to_measles_to_measles,cat4,inf,state_person_time,transition_count,1


In [57]:
1/0.785547

1.2729983056392553