In [1]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
pd.set_option('display.max_rows', 8)

from scipy import stats
import collections

import warnings
# warnings.filterwarnings('ignore')
from matplotlib.backends.backend_pdf import PdfPages

from pathlib import Path
import yaml
import re

import db_queries as db
import vivarium_helpers.id_helper as idh
import gbd_mapping
from vivarium import Artifact

# Add the repo directory vivarium_research_ciff_sam/ to sys.path
import os, sys
repo_path = os.path.abspath('../..')
sys.path.append(repo_path)
# Assumes vivarium_research_ciff_sam/ is in sys.path
# import model_validation.vivarium_transformed_output as vto
# import model_validation.vivarium_raw_output as vro
import model_validation.vivarium_output_processing as vp
import model_validation.ciff_sam_results as csr
import model_validation.ciff_sam_plots as csp

%load_ext autoreload
%autoreload 2

!pwd
!whoami
!date

/ihme/homes/ndbs/vivarium_research_ciff_sam/model_validation/model5
ndbs
Thu Jan  6 17:42:35 PST 2022


# Get list of input draws used in simulation

In [2]:
results_dir = (
    '/share/costeffectiveness/results/vivarium_ciff_sam/'
    'v5.1.2_lbwsg_with_observer/ciff_sam/2021_12_29_17_30_01'
)

In [3]:
with open(f"{results_dir}/keyspace.yaml") as keyspace_file:
    keyspace = yaml.safe_load(keyspace_file)
print(yaml.dump(keyspace, default_flow_style=True))

{input_draw: [602, 357, 946, 829, 650, 232, 394, 680, 629, 29, 223, 524], intervention.scenario: [
    baseline, wasting_treatment, sqlns], random_seed: [893, 466, 659, 963, 734, 321,
    35, 714, 867, 276, 197, 776, 473, 318, 449, 439, 827, 760, 837, 211, 871, 812,
    296, 623, 914, 877, 241, 771, 427, 141, 594, 797, 956, 740, 520, 50, 52, 874,
    513, 573, 744, 747, 558, 185, 522, 932, 718, 834, 245, 440, 940, 77, 974, 860,
    135, 315, 120, 136, 327, 666, 345, 653, 346, 88, 852, 0, 111, 959, 641, 562, 770,
    845, 182, 817, 233, 319, 557, 811, 793, 214, 160, 234, 584, 689, 875, 547, 373,
    192, 472, 767, 29, 567, 166, 842, 343, 391, 802, 637, 114, 668]}



In [4]:
keyspace['input_draw']

[602, 357, 946, 829, 650, 232, 394, 680, 629, 29, 223, 524]

In [5]:
len(keyspace['input_draw'])

12

In [6]:
sim_draw_names = [f'draw_{i}' for i in keyspace['input_draw']]
sim_draw_names

['draw_602',
 'draw_357',
 'draw_946',
 'draw_829',
 'draw_650',
 'draw_232',
 'draw_394',
 'draw_680',
 'draw_629',
 'draw_29',
 'draw_223',
 'draw_524']

# Load artifact data

In [7]:
artifact_path = '/ihme/costeffectiveness/artifacts/vivarium_ciff_sam/ethiopia.hdf'

In [8]:
art = Artifact(artifact_path, filter_terms=['year_start == 2019', f'age_end <= 5'])
print(art)

Artifact containing the following keys:
metadata
	keyspace
	locations
population
	location
	structure
	age_bins
	demographic_dimensions
	theoretical_minimum_risk_life_expectancy
cause
	all_causes
		cause_specific_mortality_rate
	diarrheal_diseases
		prevalence
		incidence_rate
		remission_rate
		disability_weight
		excess_mortality_rate
		cause_specific_mortality_rate
		restrictions
	measles
		prevalence
		incidence_rate
		disability_weight
		excess_mortality_rate
		cause_specific_mortality_rate
		restrictions
	lower_respiratory_infections
		prevalence
		incidence_rate
		remission_rate
		disability_weight
		excess_mortality_rate
		cause_specific_mortality_rate
		restrictions
	protein_energy_malnutrition
		excess_mortality_rate
		cause_specific_mortality_rate
		restrictions
	upper_respiratory_infections
		cause_specific_mortality_rate
	otitis_media
		cause_specific_mortality_rate
	meningitis
		cause_specific_mortality_rate
	encephalitis
		cause_specific_mortality_rate
	neonatal_preterm_

# Load LBWSG exposure from artifact

In [9]:
art_lbwsg_exposure = art.load(f'risk_factor.low_birth_weight_and_short_gestation.exposure')
art_lbwsg_exposure

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Unnamed: 4_level_0,Unnamed: 5_level_0,draw_0,draw_1,draw_2,draw_3,draw_4,draw_5,draw_6,draw_7,draw_8,draw_9,...,draw_990,draw_991,draw_992,draw_993,draw_994,draw_995,draw_996,draw_997,draw_998,draw_999
sex,age_start,age_end,year_start,year_end,parameter,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1
Female,-1.000000,0.000000,2019,2020,cat10,0.001853,0.002055,0.001956,0.002313,0.002177,0.001907,0.002089,0.001901,0.002099,0.001958,...,0.002451,0.001634,0.002354,0.002130,0.001696,0.002240,0.002042,0.002173,0.002027,0.001853
Female,-1.000000,0.000000,2019,2020,cat106,0.000644,0.001027,0.000853,0.000686,0.000744,0.000646,0.000802,0.000537,0.000971,0.000921,...,0.000549,0.000648,0.000479,0.000635,0.000567,0.000646,0.000748,0.000634,0.000694,0.000644
Female,-1.000000,0.000000,2019,2020,cat11,0.001160,0.001022,0.001121,0.001271,0.001028,0.000873,0.000954,0.001114,0.001211,0.000858,...,0.001054,0.000659,0.000958,0.001097,0.000982,0.001161,0.000845,0.000897,0.000894,0.001160
Female,-1.000000,0.000000,2019,2020,cat116,0.001423,0.001317,0.001337,0.001481,0.000871,0.001025,0.001501,0.001185,0.000958,0.001586,...,0.001039,0.001013,0.001407,0.001252,0.001355,0.001491,0.001080,0.001052,0.001162,0.001423
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
Male,0.019178,0.076712,2019,2020,cat89,0.001273,0.001098,0.001634,0.001312,0.001606,0.000928,0.001058,0.000821,0.001711,0.001374,...,0.001260,0.001230,0.001515,0.001387,0.001394,0.001160,0.001553,0.001290,0.001145,0.001273
Male,0.019178,0.076712,2019,2020,cat90,0.000562,0.000361,0.000175,0.000422,0.000753,0.001468,0.000355,0.000565,0.000327,0.000272,...,0.000383,0.000925,0.000354,0.000860,0.000355,0.000431,0.000526,0.000298,0.001050,0.000562
Male,0.019178,0.076712,2019,2020,cat95,0.003673,0.004023,0.005066,0.002657,0.004408,0.004279,0.004356,0.004306,0.003470,0.003993,...,0.002988,0.002671,0.003323,0.002007,0.002955,0.003355,0.002468,0.004712,0.002778,0.003673
Male,0.019178,0.076712,2019,2020,cat96,0.002109,0.002188,0.002040,0.001488,0.003236,0.001728,0.001275,0.003096,0.002630,0.004102,...,0.002284,0.003623,0.002825,0.002803,0.001529,0.001600,0.004588,0.001723,0.002780,0.002109


In [10]:
art_lbwsg_exposure.index.unique('age_end')

Float64Index([0.0, 0.01917808, 0.07671233], dtype='float64', name='age_end')

In [11]:
len(art_lbwsg_exposure.index.unique('parameter'))

58

In [12]:
3*58*2

348

In [13]:
art_lbwsg_exposure.loc[:,sim_draw_names]

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Unnamed: 4_level_0,Unnamed: 5_level_0,draw_602,draw_357,draw_946,draw_829,draw_650,draw_232,draw_394,draw_680,draw_629,draw_29,draw_223,draw_524
sex,age_start,age_end,year_start,year_end,parameter,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1
Female,-1.000000,0.000000,2019,2020,cat10,0.002234,0.002017,0.002135,0.002387,0.002044,0.002686,0.002325,0.002314,0.002128,0.001931,0.002374,0.001934
Female,-1.000000,0.000000,2019,2020,cat106,0.000826,0.000915,0.000776,0.000582,0.000946,0.000943,0.000741,0.000745,0.000871,0.000503,0.000929,0.000643
Female,-1.000000,0.000000,2019,2020,cat11,0.000971,0.001163,0.001234,0.001409,0.001139,0.001228,0.001313,0.001379,0.001374,0.001220,0.000922,0.001279
Female,-1.000000,0.000000,2019,2020,cat116,0.001105,0.001154,0.001073,0.001335,0.000884,0.001185,0.001301,0.001459,0.001519,0.001051,0.001209,0.001042
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
Male,0.019178,0.076712,2019,2020,cat89,0.000903,0.001174,0.001767,0.000786,0.001998,0.000990,0.001379,0.000946,0.001156,0.001451,0.000599,0.001623
Male,0.019178,0.076712,2019,2020,cat90,0.000163,0.000179,0.000645,0.002092,0.000477,0.000222,0.000374,0.000409,0.000423,0.000148,0.000371,0.000912
Male,0.019178,0.076712,2019,2020,cat95,0.003102,0.002961,0.004614,0.002847,0.004280,0.003761,0.003217,0.003648,0.003967,0.003558,0.002848,0.004421
Male,0.019178,0.076712,2019,2020,cat96,0.003882,0.002000,0.002860,0.000896,0.001862,0.002199,0.002997,0.003756,0.001873,0.002125,0.003113,0.001723


# Load category data from my .csv file

In [14]:
!ls ../lbwsg/

lbwsg_category_data.csv


In [15]:
lbwsg_cat_data_filepath = '../lbwsg/lbwsg_category_data.csv'

In [16]:
cat_df = pd.read_csv(lbwsg_cat_data_filepath)
cat_df

Unnamed: 0,lbwsg_category,modelable_entity_id,modelable_entity_name,ga_start,ga_end,bw_start,bw_end,ga_interval,ga_width,ga_midpoint,bw_interval,bw_width,bw_midpoint
0,cat2,10755,"Birth prevalence - [0, 24) wks, [0, 500) g",0,24,0,500,"[0, 24)",24,12.0,"[0, 500)",500,250.0
1,cat8,10761,"Birth prevalence - [0, 24) wks, [500, 1000) g",0,24,500,1000,"[0, 24)",24,12.0,"[500, 1000)",500,750.0
2,cat10,10763,"Birth prevalence - [24, 26) wks, [500, 1000) g",24,26,500,1000,"[24, 26)",2,25.0,"[500, 1000)",500,750.0
3,cat11,10764,"Birth prevalence - [26, 28) wks, [500, 1000) g",26,28,500,1000,"[26, 28)",2,27.0,"[500, 1000)",500,750.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...
54,cat116,20227,"Birth prevalence - [38, 40) wks, [1000, 1500) g",38,40,1000,1500,"[38, 40)",2,39.0,"[1000, 1500)",500,1250.0
55,cat117,20228,"Birth prevalence - [38, 40) wks, [1500, 2000) g",38,40,1500,2000,"[38, 40)",2,39.0,"[1500, 2000)",500,1750.0
56,cat123,20232,"Birth prevalence - [40, 42) wks, [1500, 2000) g",40,42,1500,2000,"[40, 42)",2,41.0,"[1500, 2000)",500,1750.0
57,cat124,20224,"Birth prevalence - [37, 38) wks, [1000, 1500) g",37,38,1000,1500,"[37, 38)",1,37.5,"[1000, 1500)",500,1250.0


# Test code for calculating mean birthweight from artifact exposure

In [17]:
mean_birthweight_by_cat = (
    cat_df
    .set_index('lbwsg_category')
    ['bw_midpoint']
)
mean_birthweight_by_cat

lbwsg_category
cat2       250.0
cat8       750.0
cat10      750.0
cat11      750.0
           ...  
cat116    1250.0
cat117    1750.0
cat123    1750.0
cat124    1250.0
Name: bw_midpoint, Length: 58, dtype: float64

In [18]:
lbwsg_exposure = (
    art_lbwsg_exposure.loc[:,sim_draw_names]
    .rename_axis(columns='draw')
    .rename_axis(index={'parameter':'lbwsg_category'})
    .stack('draw')
)
lbwsg_exposure

sex     age_start  age_end   year_start  year_end  lbwsg_category  draw    
Female  -1.000000  0.000000  2019        2020      cat10           draw_602    0.002234
                                                                   draw_357    0.002017
                                                                   draw_946    0.002135
                                                                   draw_829    0.002387
                                                                                 ...   
Male     0.019178  0.076712  2019        2020      cat96           draw_629    0.001873
                                                                   draw_29     0.002125
                                                                   draw_223    0.003113
                                                                   draw_524    0.001723
Length: 4176, dtype: float64

In [19]:
# Prevalence-weighted mean birthewight by category
lbwsg_exposure * mean_birthweight_by_cat

sex     age_start  age_end   year_start  year_end  lbwsg_category  draw    
Female  -1.000000  0.000000  2019        2020      cat10           draw_602     1.675333
                                                                   draw_357     1.512467
                                                                   draw_946     1.601199
                                                                   draw_829     1.789944
                                                                                 ...    
Male     0.019178  0.076712  2019        2020      cat96           draw_629     7.022492
                                                                   draw_29      7.970098
                                                                   draw_223    11.674196
                                                                   draw_524     6.460206
Length: 4176, dtype: float64

# Write a function to calculate mean birthweight from artifact exposure

Mean values of birthweight mean for age group 'Birth' in 12 simulation draws:

- Female: 3248g
- Male: 3090g

In [20]:
def calculate_mean_birthweight(lbwsg_exposure, cat_df):
    """Calculates the mean birthweight according to the exposure distribution,
    assuming a uniform birthweight distribution within each LBWSG category.
    `lbwsg_exposure` is LBWSG exposure data from the Artifact
    `cat_df` is the LBWSG category data DataFrame created by Nathaniel's functions
    """
    lbwsg_exposure = (
        lbwsg_exposure
        .rename_axis(index={'parameter':'lbwsg_category'}) # rename to match cat_df index
        .rename_axis(columns='draw')
        .stack('draw')
    )
    mean_birthweight_by_cat = (
        cat_df
        .set_index('lbwsg_category')
        ['bw_midpoint'] # mean is midpoint since we're assuming uniform distribution on each category
    )
    # get groupby columns to sum over categories
    sum_index_cols = lbwsg_exposure.index.names.difference(['lbwsg_category'])
    mean_birthweight = (
        (lbwsg_exposure * mean_birthweight_by_cat) # prevalence-weighted mean birthweight by category
        .groupby(sum_index_cols)
        .sum()
        .rename('mean_birthweight')
        .unstack('draw') # put back into format from artifact
    )
    return mean_birthweight

In [21]:
mean_birthweight = calculate_mean_birthweight(art_lbwsg_exposure, cat_df)
mean_birthweight

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,draw,draw_0,draw_1,draw_2,draw_3,draw_4,draw_5,draw_6,draw_7,draw_8,draw_9,...,draw_990,draw_991,draw_992,draw_993,draw_994,draw_995,draw_996,draw_997,draw_998,draw_999
sex,age_start,age_end,year_start,year_end,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1
Female,-1.0,0.0,2019,2020,3207.466882,3240.48422,3204.807766,3274.235004,3261.633846,3268.363535,3219.364022,3183.477796,3269.168882,3266.371886,...,3217.351212,3246.48156,3266.478641,3240.077594,3281.784997,3217.68435,3271.400685,3298.582203,3218.957031,3207.466882
Female,0.0,0.019178,2019,2020,3304.887402,3300.101741,3320.660663,3258.263104,3238.544428,3345.095717,3189.277261,3288.119197,3269.881841,3240.142201,...,3256.659799,3257.948891,3236.244215,3294.810983,3253.100645,3332.601739,3218.451526,3215.535106,3290.897574,3304.887402
Female,0.019178,0.076712,2019,2020,3292.292126,3279.801972,3284.969387,3308.435978,3246.280252,3358.200468,3269.865916,3372.34538,3235.509555,3251.240968,...,3265.15325,3335.366935,3312.088228,3291.953733,3290.882522,3322.997713,3265.097409,3322.730097,3286.132369,3292.292126
Male,-1.0,0.0,2019,2020,3090.153965,3124.925654,3090.471623,3085.313395,3108.014809,3144.523415,3108.212098,3116.657371,3106.18178,3086.25489,...,3098.781604,3097.604904,3100.250325,3155.437404,3133.504476,3101.920332,3108.81271,3146.572257,3136.232797,3090.153965
Male,0.0,0.019178,2019,2020,3119.418307,3142.156866,3155.481097,3130.18481,3129.084417,3123.143793,3080.771602,3113.313975,3117.767396,3089.173452,...,3126.745057,3116.584239,3137.49935,3142.801246,3127.404087,3096.494894,3088.348936,3121.33918,3150.970995,3119.418307
Male,0.019178,0.076712,2019,2020,3127.886328,3144.248732,3155.973987,3119.376957,3159.176775,3174.136425,3152.546825,3140.926586,3155.399508,3148.360511,...,3166.900689,3162.127979,3115.241779,3204.053641,3213.275507,3141.470163,3114.109486,3145.363255,3125.760677,3127.886328


In [22]:
mean_birthweight[sim_draw_names].T.describe(percentiles=[0.025,0.975]).T

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Unnamed: 4_level_0,count,mean,std,min,2.5%,50%,97.5%,max
sex,age_start,age_end,year_start,year_end,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
Female,-1.0,0.0,2019,2020,12.0,3247.783629,38.695079,3207.24727,3208.087665,3245.692896,3324.805595,3342.343004
Female,0.0,0.019178,2019,2020,12.0,3278.520395,38.345125,3214.446715,3219.227147,3287.072297,3335.782313,3343.416434
Female,0.019178,0.076712,2019,2020,12.0,3284.373919,43.59873,3228.1759,3231.570689,3272.68773,3353.420114,3354.27429
Male,-1.0,0.0,2019,2020,12.0,3109.748259,53.795868,3061.900648,3064.262364,3095.410321,3227.474226,3259.131497
Male,0.0,0.019178,2019,2020,12.0,3129.693646,24.669046,3088.349304,3092.055682,3133.389377,3164.374418,3166.107007
Male,0.019178,0.076712,2019,2020,12.0,3137.168936,26.745219,3092.26635,3096.706676,3135.667608,3177.728483,3183.628767


In [23]:
mean_birthweight.T.describe(percentiles=[0.025,0.975]).T

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Unnamed: 4_level_0,count,mean,std,min,2.5%,50%,97.5%,max
sex,age_start,age_end,year_start,year_end,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
Female,-1.0,0.0,2019,2020,1000.0,3247.48639,36.612015,3147.356241,3182.169971,3244.16619,3331.474932,3375.670593
Female,0.0,0.019178,2019,2020,1000.0,3270.996737,36.080679,3165.241867,3207.219889,3269.272656,3347.246817,3406.313364
Female,0.019178,0.076712,2019,2020,1000.0,3278.076982,36.296456,3194.675901,3218.531196,3273.800948,3356.694601,3446.924806
Male,-1.0,0.0,2019,2020,1000.0,3107.001921,30.262947,3037.776155,3056.845977,3104.615207,3175.148666,3259.131497
Male,0.0,0.019178,2019,2020,1000.0,3132.970076,28.639244,3064.392505,3085.817046,3130.966218,3196.241076,3253.91983
Male,0.019178,0.076712,2019,2020,1000.0,3144.061765,28.749923,3080.379119,3097.997827,3141.257265,3209.204526,3291.848325


# Test code to calculate prevalence of low birthweight from artifact data

In [24]:
low_bw = cat_df.bw_end <= 2500
low_bw_cats = cat_df.loc[low_bw, 'lbwsg_category']
low_bw_cats

0       cat2
1       cat8
2      cat10
3      cat11
       ...  
54    cat116
55    cat117
56    cat123
57    cat124
Name: lbwsg_category, Length: 30, dtype: object

In [25]:
idx = pd.IndexSlice
art_lbwsg_exposure.loc[idx[:,:,:,:,:,low_bw_cats]]

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Unnamed: 4_level_0,Unnamed: 5_level_0,draw_0,draw_1,draw_2,draw_3,draw_4,draw_5,draw_6,draw_7,draw_8,draw_9,...,draw_990,draw_991,draw_992,draw_993,draw_994,draw_995,draw_996,draw_997,draw_998,draw_999
sex,age_start,age_end,year_start,year_end,parameter,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1
Female,-1.000000,0.000000,2019,2020,cat2,0.001290,0.001115,0.001021,0.001132,0.001015,0.000985,0.001149,0.001234,0.001015,0.000959,...,0.001010,0.001074,0.000933,0.001149,0.000787,0.000878,0.000803,0.001127,0.001179,0.001290
Female,0.000000,0.019178,2019,2020,cat2,0.000011,0.000018,0.000008,0.000010,0.000021,0.000031,0.000012,0.000014,0.000020,0.000016,...,0.000012,0.000030,0.000009,0.000014,0.000012,0.000029,0.000019,0.000012,0.000015,0.000011
Female,0.019178,0.076712,2019,2020,cat2,0.000017,0.000039,0.000013,0.000012,0.000015,0.000032,0.000050,0.000012,0.000022,0.000029,...,0.000024,0.000130,0.000034,0.000021,0.000017,0.000061,0.000048,0.000328,0.000019,0.000017
Male,-1.000000,0.000000,2019,2020,cat2,0.001199,0.000973,0.001284,0.001164,0.000988,0.001235,0.001169,0.001104,0.001087,0.001364,...,0.001034,0.001082,0.001397,0.001074,0.001455,0.001284,0.001355,0.001297,0.000953,0.001199
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
Female,0.019178,0.076712,2019,2020,cat124,0.001626,0.001157,0.000886,0.000904,0.000950,0.001126,0.000846,0.000963,0.001142,0.000703,...,0.001446,0.001195,0.001057,0.001258,0.001115,0.001305,0.001301,0.000824,0.001451,0.001626
Male,-1.000000,0.000000,2019,2020,cat124,0.002388,0.001731,0.002139,0.001901,0.002290,0.002268,0.002095,0.001858,0.002442,0.002088,...,0.002624,0.002220,0.001659,0.002751,0.002142,0.001910,0.001938,0.001597,0.001701,0.002388
Male,0.000000,0.019178,2019,2020,cat124,0.001690,0.001932,0.002020,0.001485,0.001410,0.002576,0.001411,0.002181,0.001723,0.002081,...,0.001903,0.001542,0.001637,0.002316,0.001535,0.001275,0.001761,0.001850,0.001755,0.001690
Male,0.019178,0.076712,2019,2020,cat124,0.001692,0.001527,0.001391,0.001029,0.001363,0.001431,0.001473,0.001517,0.001384,0.001746,...,0.001752,0.001179,0.000894,0.001412,0.001683,0.001240,0.001520,0.001529,0.001307,0.001692


# Write function to calculate prevalence of low birthweight from artifact data

Mean values of low birthweight prevalence for age group 'Birth' in 12 simulation draws:

- Female: 9.1%
- Male: 12.4%

In [26]:
def calculate_low_birthweight_prevalence(lbwsg_exposure, cat_df, low_bw_cutoff=2500):
    """Calculates prevalence of births with birthweight <= 2500g or some other cutoff.
    The cutoff must be at one of the category boundaries used by GBD (otherwise the
    returned value will be the prevalence up to the next lowest cutoff).
    `lbwsg_exposure` is LBWSG exposure data from the Artifact
    `cat_df` is the LBWSG category data DataFrame created by Nathaniel's functions
    """
    # get list of low birthweight categories
    low_bw = cat_df.bw_end <= low_bw_cutoff
    low_bw_cats = cat_df.loc[low_bw, 'lbwsg_category']
    # subset LBWSG exposure data to low birthweight categories
    idx = pd.IndexSlice
    low_bw_exposure = lbwsg_exposure.loc[idx[:,:,:,:,:,low_bw_cats]]
    # get groupby columns to sum over categories (in 'parameter' column)
    sum_index_cols = lbwsg_exposure.index.names.difference(['parameter'])
    # sum over low birthweight categories to get overall LBW exposure
    low_bw_prevalence = low_bw_exposure.groupby(sum_index_cols).sum()
    return low_bw_prevalence

In [27]:
low_bw_prevalence = calculate_low_birthweight_prevalence(art_lbwsg_exposure, cat_df)
low_bw_prevalence

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Unnamed: 4_level_0,draw_0,draw_1,draw_2,draw_3,draw_4,draw_5,draw_6,draw_7,draw_8,draw_9,...,draw_990,draw_991,draw_992,draw_993,draw_994,draw_995,draw_996,draw_997,draw_998,draw_999
sex,age_start,age_end,year_start,year_end,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1
Female,-1.0,0.0,2019,2020,0.099312,0.090108,0.096604,0.08352,0.092951,0.088534,0.10215,0.09781,0.089075,0.087994,...,0.099088,0.083019,0.085595,0.087196,0.086098,0.092752,0.085172,0.089668,0.085644,0.099312
Female,0.0,0.019178,2019,2020,0.08817,0.079393,0.080627,0.08268,0.087363,0.074933,0.092716,0.076232,0.090597,0.08111,...,0.078551,0.081882,0.087891,0.079619,0.08699,0.067924,0.097567,0.080377,0.08446,0.08817
Female,0.019178,0.076712,2019,2020,0.075644,0.079172,0.079087,0.073222,0.076893,0.073253,0.086823,0.070347,0.084035,0.079549,...,0.084591,0.080991,0.078973,0.080231,0.071434,0.075425,0.081106,0.077471,0.077627,0.075644
Male,-1.0,0.0,2019,2020,0.132917,0.127282,0.130556,0.126413,0.134208,0.124423,0.125955,0.121231,0.126052,0.128972,...,0.123742,0.126968,0.128484,0.124309,0.124392,0.130267,0.120286,0.119955,0.124079,0.132917
Male,0.0,0.019178,2019,2020,0.119974,0.108984,0.112501,0.118649,0.109317,0.121963,0.12112,0.11558,0.115683,0.121024,...,0.113478,0.109791,0.122217,0.114853,0.114343,0.114957,0.122922,0.123582,0.122941,0.119974
Male,0.019178,0.076712,2019,2020,0.109228,0.111334,0.107531,0.1029,0.108976,0.105097,0.107417,0.108487,0.109059,0.109441,...,0.103118,0.106953,0.110663,0.101433,0.099892,0.104594,0.105391,0.108785,0.110392,0.109228


In [28]:
low_bw_prevalence[sim_draw_names].T.describe(percentiles=[0.025,0.975]).T

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Unnamed: 4_level_0,count,mean,std,min,2.5%,50%,97.5%,max
sex,age_start,age_end,year_start,year_end,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
Female,-1.0,0.0,2019,2020,12.0,0.090694,0.005042,0.083312,0.083897,0.08979,0.10022,0.101802
Female,0.0,0.019178,2019,2020,12.0,0.080356,0.006134,0.069607,0.070735,0.080659,0.090833,0.091609
Female,0.019178,0.076712,2019,2020,12.0,0.078318,0.005426,0.071299,0.071359,0.077197,0.086609,0.08722
Male,-1.0,0.0,2019,2020,12.0,0.12427,0.007213,0.10845,0.11077,0.12373,0.134232,0.135075
Male,0.0,0.019178,2019,2020,12.0,0.115887,0.004453,0.109863,0.110238,0.115507,0.123452,0.123612
Male,0.019178,0.076712,2019,2020,12.0,0.108625,0.005929,0.098548,0.098921,0.109867,0.115853,0.116215


In [29]:
low_bw_prevalence.T.describe(percentiles=[0.025,0.975]).T

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Unnamed: 4_level_0,count,mean,std,min,2.5%,50%,97.5%,max
sex,age_start,age_end,year_start,year_end,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
Female,-1.0,0.0,2019,2020,1000.0,0.092677,0.005758,0.077184,0.081455,0.092637,0.103564,0.11202
Female,0.0,0.019178,2019,2020,1000.0,0.083254,0.005362,0.065365,0.073195,0.083107,0.094129,0.100946
Female,0.019178,0.076712,2019,2020,1000.0,0.079261,0.005124,0.063407,0.06989,0.07912,0.0891,0.096019
Male,-1.0,0.0,2019,2020,1000.0,0.126568,0.005535,0.108013,0.115356,0.126351,0.137294,0.144126
Male,0.0,0.019178,2019,2020,1000.0,0.114849,0.005144,0.096926,0.104903,0.114826,0.125055,0.133591
Male,0.019178,0.076712,2019,2020,1000.0,0.108298,0.004829,0.091206,0.098797,0.108291,0.117932,0.129573


# Verify that prevalences add to 1 in each draw since they don't in GBD

In [30]:
# Double-check that artifact prevalences add up to 1 since they don't in GBD
# Sum should be 6 = (2 sexes) x (3 age groups)
art_lbwsg_exposure.sum().unique()

array([6., 6., 6., 6., 6.])

In [31]:
# Slight variations due to floating point arithmetic
list(art_lbwsg_exposure.sum().unique())

[6.0,
 5.999999999999999,
 6.000000000000001,
 6.000000000000002,
 5.999999999999998]