In [1]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
pd.set_option('display.max_rows', 8)

import collections

import warnings
# warnings.filterwarnings('ignore')
from matplotlib.backends.backend_pdf import PdfPages

from pathlib import Path

import db_queries as db
import vivarium_helpers.id_helper as idh

# Add the repo directory vivarium_research_ciff_sam/ to sys.path
import os, sys
repo_path = os.path.abspath('../..')
sys.path.append(repo_path)
# Assumes vivarium_research_ciff_sam/ is in sys.path
import model_validation.vivarium_transformed_output as vto
# import model_validation.vivarium_raw_output as vro
import model_validation.vivarium_output_processing as vo

!pwd
!whoami
!date

/ihme/homes/ndbs/vivarium_research_ciff_sam/nathaniel/scratch__gitignore__
ndbs
Tue Aug 24 17:43:51 PDT 2021


In [2]:
%load_ext autoreload
%autoreload 2

# Define and create directories

In [3]:
project_results_dir = '/ihme/costeffectiveness/results/vivarium_ciff_sam'
model_name = 'v2.5_stunting'
model_timestamp = '2021_08_05_16_17_12'
username = 'ndbs'

In [4]:
model_count_data_dir = f'{project_results_dir}/{model_name}/ciff_sam/{model_timestamp}/count_data/'

project_vv_directory_name = 'ciff_malnutrition/verification_and_validation'

output_dir = f'/ihme/homes/{username}/vivarium_results/{project_vv_directory_name}/{model_name}'
share_output_dir = f'/share/scratch/users/ndbs/vivarium_results/{project_vv_directory_name}/{model_name}'
j_output_dir = f'/home/j/Project/simulation_science/{project_vv_directory_name}/{model_name}'

# Create the output directories if they don't exist
# Note from Path.mkdir() documentation:
#   "If mode is given, it is combined with the process’ umask value to determine the file mode and access flags."
#
# I don't know what this notebook process' umask value will be, so I don't know if this will actually result
# in the correct (most permissive) permissions for the directories...
for directory in [output_dir, share_output_dir, j_output_dir]:
    Path(directory).mkdir(mode=0o777, parents=True, exist_ok=True)

# Define a function to clean transformed data

In [5]:
def clean_transformed_data(data):
    """Reformat transformed count data to make more sense."""
    # Rename mislabeled 'cause' column in `wasting_state_person_time`
    wasting_state_person_time = data.wasting_state_person_time.rename(columns={'cause':'wasting_state'})
    # Rename poorly named 'cause' column in `disease_state_person_time` and add an actual cause column
    disease_state_person_time = (
        data.disease_state_person_time
        .rename(columns={'cause':'cause_state'})
        .assign(cause=lambda df: df['cause_state'].str.replace('susceptible_to_', ''))
    )
    # Define a function to make the transition count dataframes better
    def clean_transition_df(df):
        return (df
                .assign(transition=lambda df: df['measure'].str.replace('_event_count', ''))
                .assign(measure='transition_count')
               )
    # Make the wasting and disease transition count dataframes better
    wasting_transition_count, disease_transition_count = map(
        clean_transition_df, (data.wasting_transition_count, data.disease_transition_count)
    )
    # Create a dictionary with the original or cleaned dataframes and create a cleaned Output object
    data_dict = data.to_dict()
    data_dict.update(
        {'wasting_state_person_time': wasting_state_person_time,
         'disease_state_person_time': disease_state_person_time,
         'wasting_transition_count': wasting_transition_count,
         'disease_transition_count': disease_transition_count,
        }
    )
    clean_data = vto.VivariumTransformedOutput(data_dict)
    return clean_data

# Load and clean transformed count data

In [7]:
orig_data = vto.VivariumTransformedOutput.from_directory(model_count_data_dir)
data = clean_transformed_data(orig_data)
data.table_names()

['wasting_transition_count',
 'wasting_state_person_time',
 'deaths',
 'population',
 'ylls',
 'ylds',
 'disease_state_person_time',
 'disease_transition_count']

In [9]:
data.wasting_state_person_time

Unnamed: 0,sex,year,wasting_state,measure,input_draw,scenario,value,age
0,female,2022,mild_child_wasting,state_person_time,29,baseline,414.090349,early_neonatal
1,female,2022,mild_child_wasting,state_person_time,29,baseline,1234.535250,late_neonatal
2,female,2022,mild_child_wasting,state_person_time,29,baseline,8926.954141,1-5_months
3,female,2022,mild_child_wasting,state_person_time,29,baseline,10434.387406,6-11_months
...,...,...,...,...,...,...,...,...
2876,male,2026,susceptible_to_child_wasting,state_person_time,946,baseline,30258.041068,1-5_months
2877,male,2026,susceptible_to_child_wasting,state_person_time,946,baseline,35695.241615,6-11_months
2878,male,2026,susceptible_to_child_wasting,state_person_time,946,baseline,70879.949350,12_to_23_months
2879,male,2026,susceptible_to_child_wasting,state_person_time,946,baseline,215444.187543,2_to_4


In [10]:
data.ylls

Unnamed: 0,sex,year,cause,measure,input_draw,scenario,value,stunting_state,age
0,female,2022,diarrheal_diseases,ylls,29,baseline,2843.691511,cat4,early_neonatal
1,female,2022,diarrheal_diseases,ylls,29,baseline,0.000000,cat3,early_neonatal
2,female,2022,diarrheal_diseases,ylls,29,baseline,0.000000,cat2,early_neonatal
3,female,2022,diarrheal_diseases,ylls,29,baseline,0.000000,cat1,early_neonatal
...,...,...,...,...,...,...,...,...,...
17276,male,2026,severe_acute_malnutrition,ylls,946,baseline,258.045887,cat4,2_to_4
17277,male,2026,severe_acute_malnutrition,ylls,946,baseline,0.000000,cat3,2_to_4
17278,male,2026,severe_acute_malnutrition,ylls,946,baseline,85.628179,cat2,2_to_4
17279,male,2026,severe_acute_malnutrition,ylls,946,baseline,86.997977,cat1,2_to_4


# Define some more functions

In [23]:
def _ensure_columns_not_levels(df, column_list=None):
    """Move Index levels into columns to enable passing index level names as well as column names."""
    if column_list is None: column_list = []
    if df.index.nlevels > 1 or df.index.name in column_list:
        df = df.reset_index()
    return df

def describe(data, **describe_kwargs):
    if 'percentiles' not in describe_kwargs:
        describe_kwargs['percentiles'] = [.025, .975]
    data = _ensure_columns_not_levels(data, [vo.DRAW_COLUMN, vo.VALUE_COLUMN])
    groupby_cols = data.columns.difference([vo.DRAW_COLUMN, vo.VALUE_COLUMN]).to_list()
    return data.groupby(groupby_cols)[vo.VALUE_COLUMN].describe(**describe_kwargs)

describe(data.wasting_state_person_time)

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Unnamed: 4_level_0,Unnamed: 5_level_0,count,mean,std,min,2.5%,50%,97.5%,max
age,measure,scenario,sex,wasting_state,year,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
1-5_months,state_person_time,baseline,female,mild_child_wasting,2022,12.0,8826.488935,88.753512,8700.517454,8709.954415,8816.203285,8979.521253,8995.934292
1-5_months,state_person_time,baseline,female,mild_child_wasting,2023,12.0,8794.313826,165.691577,8607.275838,8615.604894,8744.853525,9120.758350,9171.915127
1-5_months,state_person_time,baseline,female,mild_child_wasting,2024,12.0,8836.645562,137.494408,8702.147844,8706.552361,8760.017112,9083.276899,9116.984257
1-5_months,state_person_time,baseline,female,mild_child_wasting,2025,12.0,8808.098677,141.141967,8655.026694,8655.982888,8790.670089,9095.137029,9152.725530
...,...,...,...,...,...,...,...,...,...,...,...,...,...
late_neonatal,state_person_time,baseline,male,susceptible_to_child_wasting,2023,12.0,4174.045403,30.327060,4101.519507,4111.165024,4179.370979,4206.719644,4208.154689
late_neonatal,state_person_time,baseline,male,susceptible_to_child_wasting,2024,12.0,4199.450605,25.089339,4152.565366,4159.082170,4196.355921,4230.899281,4231.227926
late_neonatal,state_person_time,baseline,male,susceptible_to_child_wasting,2025,12.0,4182.792494,40.514449,4083.872690,4105.738672,4177.446270,4237.145277,4244.736482
late_neonatal,state_person_time,baseline,male,susceptible_to_child_wasting,2026,12.0,4172.952202,24.003145,4122.913073,4127.741478,4179.908966,4205.233231,4207.151266


In [26]:
def get_all_ages_person_time(person_time):
    return vo.marginalize(person_time, 'age').assign(age='all')[person_time.columns]

def get_total_person_time(data, include_all_ages=False):
    if not include_all_ages:
        person_time = vo.marginalize(data.wasting_state_person_time, 'wasting_state').assign(measure='person_time')
    else:
        person_time = get_total_person_time(data, False)
        person_time = person_time.append(get_all_ages_person_time(person_time), ignore_index=True)
    return person_time

# def get_total_person_time_with_all_ages(data):
#     person_time = get_total_person_time(data)
#     return person_time.append(get_all_ages_person_time(person_time))

person_time = get_total_person_time(data)
person_time

Unnamed: 0,age,input_draw,measure,scenario,sex,year,value
0,1-5_months,29,person_time,baseline,female,2022,44140.585900
1,1-5_months,29,person_time,baseline,female,2023,43843.549624
2,1-5_months,29,person_time,baseline,female,2024,43946.220397
3,1-5_months,29,person_time,baseline,female,2025,43707.561944
...,...,...,...,...,...,...,...
716,late_neonatal,946,person_time,baseline,male,2023,6263.611225
717,late_neonatal,946,person_time,baseline,male,2024,6284.217659
718,late_neonatal,946,person_time,baseline,male,2025,6291.129363
719,late_neonatal,946,person_time,baseline,male,2026,6238.913073


In [27]:
person_time_all = get_total_person_time(data, True)
person_time_all

Unnamed: 0,age,input_draw,measure,scenario,sex,year,value
0,1-5_months,29,person_time,baseline,female,2022,44140.585900
1,1-5_months,29,person_time,baseline,female,2023,43843.549624
2,1-5_months,29,person_time,baseline,female,2024,43946.220397
3,1-5_months,29,person_time,baseline,female,2025,43707.561944
...,...,...,...,...,...,...,...
836,all,946,person_time,baseline,male,2023,521780.900753
837,all,946,person_time,baseline,male,2024,529583.511294
838,all,946,person_time,baseline,male,2025,532527.890486
839,all,946,person_time,baseline,male,2026,532910.529774


# Compute death rate by wasting category

In [24]:
data.deaths

Unnamed: 0,sex,year,cause,measure,input_draw,scenario,value,stunting_state,age
0,female,2022,diarrheal_diseases,death,29,baseline,32.0,cat4,early_neonatal
1,female,2022,diarrheal_diseases,death,29,baseline,0.0,cat3,early_neonatal
2,female,2022,diarrheal_diseases,death,29,baseline,0.0,cat2,early_neonatal
3,female,2022,diarrheal_diseases,death,29,baseline,0.0,cat1,early_neonatal
...,...,...,...,...,...,...,...,...,...
17276,male,2026,severe_acute_malnutrition,death,946,baseline,3.0,cat4,2_to_4
17277,male,2026,severe_acute_malnutrition,death,946,baseline,0.0,cat3,2_to_4
17278,male,2026,severe_acute_malnutrition,death,946,baseline,1.0,cat2,2_to_4
17279,male,2026,severe_acute_malnutrition,death,946,baseline,1.0,cat1,2_to_4


In [30]:
vo.ratio(
    data.deaths,
    person_time,
    strata=['year', 'sex', 'age'],
)

Unnamed: 0,year,sex,age,input_draw,scenario,value,numerator_measure,denominator_measure,multiplier
0,2022,female,1-5_months,29,baseline,0.011033,death,person_time,1
1,2022,female,1-5_months,223,baseline,0.012241,death,person_time,1
2,2022,female,1-5_months,232,baseline,0.013871,death,person_time,1
3,2022,female,1-5_months,357,baseline,0.013104,death,person_time,1
...,...,...,...,...,...,...,...,...,...
716,2026,male,late_neonatal,650,baseline,0.125277,death,person_time,1
717,2026,male,late_neonatal,680,baseline,0.086785,death,person_time,1
718,2026,male,late_neonatal,829,baseline,0.073546,death,person_time,1
719,2026,male,late_neonatal,946,baseline,0.109474,death,person_time,1


In [33]:
df = vo.ratio(
    data.deaths,
    person_time_all,
    strata=['year', 'sex', 'age'],
)
df

Unnamed: 0,year,sex,age,input_draw,scenario,value,numerator_measure,denominator_measure,multiplier
0,2022,female,1-5_months,29,baseline,0.011033,death,person_time,1
1,2022,female,1-5_months,223,baseline,0.012241,death,person_time,1
2,2022,female,1-5_months,232,baseline,0.013871,death,person_time,1
3,2022,female,1-5_months,357,baseline,0.013104,death,person_time,1
...,...,...,...,...,...,...,...,...,...
836,2026,male,late_neonatal,650,baseline,0.125277,death,person_time,1
837,2026,male,late_neonatal,680,baseline,0.086785,death,person_time,1
838,2026,male,late_neonatal,829,baseline,0.073546,death,person_time,1
839,2026,male,late_neonatal,946,baseline,0.109474,death,person_time,1


In [34]:
df.query("age=='all'")

Unnamed: 0,year,sex,age,input_draw,scenario,value,numerator_measure,denominator_measure,multiplier
48,2022,female,all,29,baseline,,death,person_time,1
49,2022,female,all,223,baseline,,death,person_time,1
50,2022,female,all,232,baseline,,death,person_time,1
51,2022,female,all,357,baseline,,death,person_time,1
...,...,...,...,...,...,...,...,...,...
812,2026,male,all,650,baseline,,death,person_time,1
813,2026,male,all,680,baseline,,death,person_time,1
814,2026,male,all,829,baseline,,death,person_time,1
815,2026,male,all,946,baseline,,death,person_time,1


In [35]:
describe(df)

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Unnamed: 4_level_0,Unnamed: 5_level_0,Unnamed: 6_level_0,count,mean,std,min,2.5%,50%,97.5%,max
age,denominator_measure,multiplier,numerator_measure,scenario,sex,year,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
1-5_months,person_time,1,death,baseline,female,2022,12.0,0.011934,0.001498,0.008748,0.009025,0.012231,0.013704,0.013871
1-5_months,person_time,1,death,baseline,female,2023,12.0,0.011829,0.001647,0.007448,0.008403,0.012094,0.013398,0.013408
1-5_months,person_time,1,death,baseline,female,2024,12.0,0.011811,0.001524,0.008608,0.008873,0.012301,0.013427,0.013486
1-5_months,person_time,1,death,baseline,female,2025,12.0,0.011896,0.001319,0.009072,0.009597,0.011744,0.013699,0.013883
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
late_neonatal,person_time,1,death,baseline,male,2023,12.0,0.113694,0.016034,0.081422,0.085886,0.115384,0.132722,0.132908
late_neonatal,person_time,1,death,baseline,male,2024,12.0,0.109721,0.017226,0.073080,0.078113,0.112671,0.132363,0.133427
late_neonatal,person_time,1,death,baseline,male,2025,12.0,0.110308,0.017157,0.071757,0.076400,0.111413,0.133971,0.136327
late_neonatal,person_time,1,death,baseline,male,2026,12.0,0.111706,0.017072,0.073546,0.077187,0.119364,0.126029,0.126239


In [36]:
df.age.nunique()

7

In [37]:
df.year.nunique()

5