In [1]:
import pandas as pd, numpy as np
from db_queries import get_ids, get_outputs, get_location_metadata, get_population, get_covariate_estimates
from get_draws.api import get_draws
import scipy.stats as sp
import scipy.integrate as integrate
import matplotlib.pyplot as plt
import random

In [2]:
location_ids = [1, 168, 161, 201, 202, 6, 205, 171, 141, 179, 207, 163, 11, 180, 181,
       184, 15, 164, 213, 214, 165, 196, 522, 190, 189, 20]

In [3]:
year_id = 2019
age_group_ids = [2,3,4,5]

In [4]:
seqs = [1004, 1005, 1006, 1008, 1009, 1010, 1012, 1013, 1014, 1016, 1017, 1018, 1020, 1021, 1022, 1024, 1025, 1026, 1028, 1029, 1030, 1032, 1033, 1034, 1361, 1364, 1367, 1373, 1376, 1379, 1385, 1388, 1391, 1397, 1400, 1403, 1409, 1412, 1415, 1421, 1424, 1427, 1433, 1436, 1439, 1445, 1448, 1451, 5213, 5216, 5219, 5222, 5225, 5228, 5237, 5240, 5243, 5246, 5249, 5252, 5261, 5264, 5267, 5270, 5273, 5276,
  4985, 4988, 4991, 4994, 4997, 5000, 5009, 5012, 5015, 5678, 5681, 5684, 7214, 7217, 7220,
        4952, 4955, 4958, 4961, 4964, 4967, 4976, 4979, 4982, 5627, 5630, 5633, 7202, 7205, 7208,
        5393, 5396, 5399, 182, 183, 184, 240, 241, 242, 177, 178, 179, 144,145,146,172,173,174,525,526,527,1106,1107,1108,537,538,539,206,207,208,
        22989, 22990, 22991, 22992, 22993, 22999, 23000, 23001, 23002,
       23003, 23009, 23010, 23011, 23012, 23013,
       5567, 5570, 5573, 5579, 5582, 5585,
       23030, 23031, 23032, 23034, 23035, 23036, 23038, 23039, 23040,
       23042, 23043, 23044, 23046, 23047, 23048]
    

In [5]:
s = get_ids('sequela')
pd.DataFrame.to_csv(s[s['sequela_id'].isin(seqs)], 'out.csv')

# Under five, selected locations

In [6]:
ylds = get_draws('sequela_id', seqs, 
                 source='como',
                 location_id=location_ids, 
                 age_group_id=age_group_ids,
                 year_id=2019,
                 measure_id=[3,5],
                 decomp_step='step5',
                 gbd_round_id=6)
ylds.head()

Unnamed: 0,age_group_id,draw_0,draw_1,draw_10,draw_100,draw_101,draw_102,draw_103,draw_104,draw_105,...,draw_996,draw_997,draw_998,draw_999,location_id,measure_id,sequela_id,sex_id,year_id,metric_id
0,2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,1,3,144,1,2019,3
1,2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,1,3,145,1,2019,3
2,2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,1,3,146,1,2019,3
3,2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,1,3,172,1,2019,3
4,2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,1,3,173,1,2019,3


In [7]:
# sum over causes
yld_sum = ylds.groupby(['location_id','sex_id','age_group_id','measure_id']).sum()
yld_sum = yld_sum.drop(columns=[c for c in yld_sum.columns if 'draw' not in c]).reset_index()
yld_sum['parameter'] = 'Total iron responsive anemia'
yld_sum['metric_id'] = 'rate'
yld_sum['measure'] = np.where(yld_sum.measure_id==3, 'ylds', 'prevalence')
yld_sum.head()

Unnamed: 0,location_id,sex_id,age_group_id,measure_id,draw_0,draw_1,draw_10,draw_100,draw_101,draw_102,...,draw_993,draw_994,draw_995,draw_996,draw_997,draw_998,draw_999,parameter,metric_id,measure
0,1,1,2,3,0.003992,0.003677,0.002815,0.003291,0.002532,0.003511,...,0.002811,0.002758,0.00332,0.001557,0.002465,0.00254,0.002312,Total iron responsive anemia,rate,ylds
1,1,1,2,5,0.200743,0.19342,0.181837,0.181523,0.176887,0.186832,...,0.182029,0.18013,0.183981,0.198579,0.175099,0.186115,0.170032,Total iron responsive anemia,rate,prevalence
2,1,1,3,3,0.023294,0.022104,0.017402,0.022141,0.018155,0.020508,...,0.018417,0.018546,0.020763,0.009605,0.016105,0.016702,0.015815,Total iron responsive anemia,rate,ylds
3,1,1,3,5,0.695343,0.681188,0.6827,0.688939,0.685925,0.67337,...,0.682245,0.693141,0.663559,0.698901,0.687547,0.688596,0.693989,Total iron responsive anemia,rate,prevalence
4,1,1,4,3,0.013691,0.014288,0.011304,0.013703,0.009904,0.013039,...,0.011801,0.011901,0.012714,0.006323,0.009873,0.010061,0.010332,Total iron responsive anemia,rate,ylds


In [8]:
pop_sum = get_population(location_id=location_ids,
                     sex_id=[1,2],
                    gbd_round_id=6,
                    decomp_step='step4',
                    age_group_id=age_group_ids).drop(columns=['year_id','run_id'])
pop_sum.head()

Unnamed: 0,age_group_id,location_id,sex_id,population
0,2,1,1,1330245.0
1,3,1,1,3951181.0
2,4,1,1,62923600.0
3,5,1,1,274193700.0
4,2,1,2,1241845.0


In [9]:
counts = yld_sum.merge(pop_sum, on=['location_id','sex_id','age_group_id'])
for i in list(range(0,1000)):
    counts[f'draw_{i}'] = counts[f'draw_{i}'] * counts['population']
    
counts['metric_id'] = 'count'
# sum over age groups and sexes for total under 5 population for each location and measure
counts = counts.groupby([c for c in counts.columns if 'draw' not in c and c not in ['sex_id','population','age_group_id']]).sum().drop(columns='age_group_id').reset_index()
counts['sex_id'] = 3
counts['age_group_id'] = 1
counts.head()

Unnamed: 0,location_id,measure_id,parameter,metric_id,measure,sex_id,draw_0,draw_1,draw_10,draw_100,...,draw_992,draw_993,draw_994,draw_995,draw_996,draw_997,draw_998,draw_999,population,age_group_id
0,1,3,Total iron responsive anemia,count,ylds,3,7989029.0,7585250.0,5992380.0,7537664.0,...,6945814.0,6379412.0,6402699.0,7109910.0,3352733.0,5513215.0,5493529.0,5425313.0,662842700.0,1
1,1,5,Total iron responsive anemia,count,prevalence,3,229170800.0,227550000.0,224109300.0,228471500.0,...,223200300.0,229039100.0,227400000.0,221308200.0,229832300.0,227389100.0,225549500.0,230607300.0,662842700.0,1
2,6,3,Total iron responsive anemia,count,ylds,3,129995.2,121832.6,81966.72,153184.5,...,119766.1,119149.5,105021.2,125694.3,49556.7,120086.7,71964.13,121791.5,81490860.0,1
3,6,5,Total iron responsive anemia,count,prevalence,3,5748553.0,5493788.0,4823885.0,6568525.0,...,5795847.0,6313868.0,5724151.0,5886661.0,5292320.0,6792652.0,5018238.0,7482364.0,81490860.0,1
4,11,3,Total iron responsive anemia,count,ylds,3,65999.4,88533.37,61520.83,73129.06,...,67119.63,88049.08,68771.52,57963.16,23906.42,48632.8,52859.84,40983.23,19706350.0,1


In [10]:
rates = pd.DataFrame.copy(counts)
for i in list(range(0,1000)):
    rates[f'draw_{i}'] = rates[f'draw_{i}'] / rates['population']
rates['metric_id'] = 'rate'
rates.head()

Unnamed: 0,location_id,measure_id,parameter,metric_id,measure,sex_id,draw_0,draw_1,draw_10,draw_100,...,draw_992,draw_993,draw_994,draw_995,draw_996,draw_997,draw_998,draw_999,population,age_group_id
0,1,3,Total iron responsive anemia,rate,ylds,3,0.012053,0.011444,0.00904,0.011372,...,0.010479,0.009624,0.009659,0.010726,0.005058,0.008318,0.008288,0.008185,662842700.0,1
1,1,5,Total iron responsive anemia,rate,prevalence,3,0.345739,0.343294,0.338103,0.344684,...,0.336732,0.345541,0.343068,0.333877,0.346737,0.343051,0.340276,0.347906,662842700.0,1
2,6,3,Total iron responsive anemia,rate,ylds,3,0.001595,0.001495,0.001006,0.00188,...,0.00147,0.001462,0.001289,0.001542,0.000608,0.001474,0.000883,0.001495,81490860.0,1
3,6,5,Total iron responsive anemia,rate,prevalence,3,0.070542,0.067416,0.059195,0.080604,...,0.071123,0.077479,0.070243,0.072237,0.064944,0.083355,0.06158,0.091818,81490860.0,1
4,11,3,Total iron responsive anemia,rate,ylds,3,0.003349,0.004493,0.003122,0.003711,...,0.003406,0.004468,0.00349,0.002941,0.001213,0.002468,0.002682,0.00208,19706350.0,1


In [11]:
l = get_ids('location')
l = l.loc[l.location_id.isin(location_ids)]
l = l[['location_id','location_name']]

In [12]:
ira = pd.concat([rates, counts], ignore_index=True)
ira = ira.merge(l, on='location_id').drop(columns='population')
ira = ira.set_index([c for c in ira.columns if 'draw' not in c])
ira = ira.apply(pd.DataFrame.describe, percentiles=[0.025,0.975], axis=1).rename(columns={'2.5%':'lower','97.5%':'upper'})
ira = ira[['mean','lower','upper']]
ira = ira.reset_index()
ira.head()

Unnamed: 0,location_id,measure_id,parameter,metric_id,measure,sex_id,age_group_id,location_name,mean,lower,upper
0,1,3,Total iron responsive anemia,rate,ylds,3,1,Global,0.01030491,0.006890595,0.01491708
1,1,5,Total iron responsive anemia,rate,prevalence,3,1,Global,0.3412452,0.3332895,0.3490746
2,1,3,Total iron responsive anemia,count,ylds,3,1,Global,6830531.0,4567381.0,9887679.0
3,1,5,Total iron responsive anemia,count,prevalence,3,1,Global,226191900.0,220918500.0,231381600.0
4,6,3,Total iron responsive anemia,rate,ylds,3,1,China,0.001546049,0.000868757,0.002448056



# Women of reproductive age in selected locations

In [13]:
ages = [7, 8, 9, 10, 11, 12, 13, 14, 15] # 10-54

In [14]:
ylds_wra = get_draws('sequela_id', seqs, 
                 source='como',
                 location_id=location_ids, 
                 age_group_id=ages,
                 year_id=2019,
                 measure_id=[3,5],
                     sex_id=2,
                 decomp_step='step5',
                 gbd_round_id=6)
ylds_wra.head()

Unnamed: 0,age_group_id,draw_0,draw_1,draw_10,draw_100,draw_101,draw_102,draw_103,draw_104,draw_105,...,draw_996,draw_997,draw_998,draw_999,location_id,measure_id,sequela_id,sex_id,year_id,metric_id
0,7,6.285875e-07,8.876883e-07,9.279934e-07,1e-06,4.709083e-07,6.699409e-07,6.914293e-07,8.533791e-07,3e-06,...,2.631115e-07,7.421502e-07,9.53951e-07,4.73502e-07,1,3,144,2,2019,3
1,7,4.427225e-05,4.47821e-05,3.17389e-05,3.9e-05,3.391062e-05,3.574721e-05,4.206319e-05,2.975047e-05,8.2e-05,...,2.400067e-05,3.669814e-05,4.859811e-05,3.856409e-05,1,3,145,2,2019,3
2,7,1.177433e-05,6.095171e-06,4.95782e-06,6e-06,9.284979e-06,7.864537e-06,7.005875e-06,7.034097e-06,1.4e-05,...,9.139631e-06,9.520524e-06,8.167625e-06,9.370701e-06,1,3,146,2,2019,3
3,7,9.613952e-07,1.423029e-06,1.502879e-06,2e-06,8.141798e-07,1.390992e-06,1.016309e-06,1.790696e-06,3e-06,...,4.290841e-07,9.971721e-07,1.028192e-06,6.694276e-07,1,3,172,2,2019,3
4,7,7.64916e-05,8.35395e-05,5.894396e-05,7.1e-05,6.369756e-05,7.58194e-05,7.990197e-05,5.999081e-05,8.3e-05,...,3.501279e-05,5.610574e-05,5.675986e-05,4.886782e-05,1,3,173,2,2019,3


In [15]:
# sum over causes
yld_sum_wra = ylds_wra.groupby(['location_id','sex_id','age_group_id','measure_id']).sum()
yld_sum_wra = yld_sum_wra.drop(columns=[c for c in yld_sum_wra.columns if 'draw' not in c]).reset_index()
yld_sum_wra['parameter'] = 'Total iron responsive anemia'
yld_sum_wra['metric_id'] = 'rate'
yld_sum_wra['measure'] = np.where(yld_sum_wra.measure_id==3, 'ylds', 'prevalence')
yld_sum_wra.head()

Unnamed: 0,location_id,sex_id,age_group_id,measure_id,draw_0,draw_1,draw_10,draw_100,draw_101,draw_102,...,draw_993,draw_994,draw_995,draw_996,draw_997,draw_998,draw_999,parameter,metric_id,measure
0,1,2,7,3,0.008585,0.007903,0.006434,0.00752,0.006713,0.007743,...,0.007079,0.006797,0.008402,0.003705,0.005678,0.005986,0.005899,Total iron responsive anemia,rate,ylds
1,1,2,7,5,0.182192,0.181377,0.180771,0.169972,0.184604,0.184583,...,0.187327,0.178589,0.189536,0.184504,0.171154,0.182053,0.184581,Total iron responsive anemia,rate,prevalence
2,1,2,8,3,0.008251,0.006903,0.006196,0.007498,0.006216,0.007072,...,0.006516,0.006692,0.007272,0.003401,0.005331,0.005476,0.005628,Total iron responsive anemia,rate,ylds
3,1,2,8,5,0.232165,0.218208,0.224901,0.217363,0.226183,0.222091,...,0.222313,0.224496,0.219031,0.21769,0.216504,0.217543,0.220903,Total iron responsive anemia,rate,prevalence
4,1,2,9,3,0.006979,0.006608,0.005435,0.006785,0.005331,0.006047,...,0.005518,0.006078,0.006147,0.003117,0.004829,0.00509,0.005003,Total iron responsive anemia,rate,ylds


In [16]:
pop = get_population(location_id=location_ids,
                     sex_id=2,
                    gbd_round_id=6,
                    decomp_step='step4',
                     year_id=2019,
                    age_group_id=ages).drop(columns=['year_id','run_id'])
pop.head()

Unnamed: 0,age_group_id,location_id,sex_id,population
0,7,1,2,310852500.0
1,8,1,2,301758900.0
2,9,1,2,295776200.0
3,10,1,2,300693100.0
4,11,1,2,298557800.0


In [17]:
counts = yld_sum_wra.merge(pop, on=['location_id','sex_id','age_group_id'])
for i in list(range(0,1000)):
    counts[f'draw_{i}'] = counts[f'draw_{i}'] * counts['population']   
counts['metric_id'] = 'count'
# sum over age groups for total WRA population for each location and measure
counts = counts.groupby([c for c in counts.columns if 'draw' not in c and c not in ['population','age_group_id']]).sum().drop(columns='age_group_id').reset_index()
counts['age_group_id'] = 169
counts.head()

Unnamed: 0,location_id,sex_id,measure_id,parameter,metric_id,measure,draw_0,draw_1,draw_10,draw_100,...,draw_992,draw_993,draw_994,draw_995,draw_996,draw_997,draw_998,draw_999,population,age_group_id
0,1,2,3,Total iron responsive anemia,count,ylds,18595130.0,17394720.0,14291480.0,17586370.0,...,17029680.0,15106840.0,15938900.0,16913350.0,8462072.0,13304880.0,13381310.0,13502150.0,2475433000.0,169
1,1,2,5,Total iron responsive anemia,count,prevalence,487765700.0,484966800.0,480292700.0,482811800.0,...,491653300.0,486302600.0,493167300.0,469735200.0,490012900.0,485382300.0,494630500.0,496992000.0,2475433000.0,169
2,6,2,3,Total iron responsive anemia,count,ylds,954788.0,965628.9,676798.6,929737.1,...,1230936.0,891797.9,958974.7,823174.8,478629.9,867588.0,731295.4,903698.2,445693800.0,169
3,6,2,5,Total iron responsive anemia,count,prevalence,31560620.0,32954480.0,28525590.0,31215150.0,...,40158820.0,34586140.0,35087800.0,28067640.0,33887840.0,36715150.0,32322810.0,38674510.0,445693800.0,169
4,11,2,3,Total iron responsive anemia,count,ylds,722398.3,608774.7,482338.2,636543.5,...,518412.0,508028.6,554473.8,582916.3,317309.1,418013.4,390220.8,511900.4,89213350.0,169


In [18]:
rates = pd.DataFrame.copy(counts)
for i in list(range(0,1000)):
    rates[f'draw_{i}'] = rates[f'draw_{i}'] / rates['population']
rates['metric_id'] = 'rate'
rates.head()

Unnamed: 0,location_id,sex_id,measure_id,parameter,metric_id,measure,draw_0,draw_1,draw_10,draw_100,...,draw_992,draw_993,draw_994,draw_995,draw_996,draw_997,draw_998,draw_999,population,age_group_id
0,1,2,3,Total iron responsive anemia,rate,ylds,0.007512,0.007027,0.005773,0.007104,...,0.006879,0.006103,0.006439,0.006832,0.003418,0.005375,0.005406,0.005454,2475433000.0,169
1,1,2,5,Total iron responsive anemia,rate,prevalence,0.197043,0.195912,0.194024,0.195041,...,0.198613,0.196452,0.199225,0.189759,0.19795,0.19608,0.199816,0.20077,2475433000.0,169
2,6,2,3,Total iron responsive anemia,rate,ylds,0.002142,0.002167,0.001519,0.002086,...,0.002762,0.002001,0.002152,0.001847,0.001074,0.001947,0.001641,0.002028,445693800.0,169
3,6,2,5,Total iron responsive anemia,rate,prevalence,0.070812,0.07394,0.064003,0.070037,...,0.090104,0.077601,0.078726,0.062975,0.076034,0.082378,0.072522,0.086774,445693800.0,169
4,11,2,3,Total iron responsive anemia,rate,ylds,0.008097,0.006824,0.005407,0.007135,...,0.005811,0.005695,0.006215,0.006534,0.003557,0.004686,0.004374,0.005738,89213350.0,169


In [19]:
ira_wra = pd.concat([rates, counts], ignore_index=True)
ira_wra = ira_wra.merge(l, on='location_id').drop(columns='population')
ira_wra = ira_wra.set_index([c for c in ira_wra.columns if 'draw' not in c])
ira_wra = ira_wra.apply(pd.DataFrame.describe, percentiles=[0.025,0.975], axis=1).rename(columns={'2.5%':'lower','97.5%':'upper'})
ira_wra = ira_wra[['mean','lower','upper']]
ira_wra = ira_wra.reset_index()
ira_wra.head()

Unnamed: 0,location_id,sex_id,measure_id,parameter,metric_id,measure,age_group_id,location_name,mean,lower,upper
0,1,2,3,Total iron responsive anemia,rate,ylds,169,Global,0.006488779,0.004487159,0.009090281
1,1,2,5,Total iron responsive anemia,rate,prevalence,169,Global,0.1954898,0.1905729,0.2001988
2,1,2,3,Total iron responsive anemia,count,ylds,169,Global,16062540.0,11107660.0,22502380.0
3,1,2,5,Total iron responsive anemia,count,prevalence,169,Global,483922000.0,471750300.0,495578700.0
4,6,2,3,Total iron responsive anemia,rate,ylds,169,China,0.001997833,0.001313038,0.00283952


In [20]:
final_cols = ['parameter', 'measure_id','measure', 'metric_id', 'location_id', 'age_group_id',
       'sex_id', 'location_name', 'sex', 'age', 'mean', 'lower', 'upper']

In [21]:
final_ira = pd.concat([ira_wra, ira], ignore_index=True)
final_ira['sex'] = np.where(final_ira.sex_id == 3, 'Both', 'Female')
final_ira['age'] = np.where(final_ira.age_group_id == 1, 'Under Five', 'Reproductive age')
final_ira['measire'] = np.where(final_ira.measure_id==3, 'ylds', 'prevalence')
final_ira = final_ira[final_cols]
final_ira.to_csv('ira_anemia_prevalence_and_counts.csv')
final_ira.head()

of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.


  """Entry point for launching an IPython kernel.


Unnamed: 0,parameter,measure_id,measure,metric_id,location_id,age_group_id,sex_id,location_name,sex,age,mean,lower,upper
0,Total iron responsive anemia,3,ylds,rate,1,169,2,Global,Female,Reproductive age,0.006488779,0.004487159,0.009090281
1,Total iron responsive anemia,5,prevalence,rate,1,169,2,Global,Female,Reproductive age,0.1954898,0.1905729,0.2001988
2,Total iron responsive anemia,3,ylds,count,1,169,2,Global,Female,Reproductive age,16062540.0,11107660.0,22502380.0
3,Total iron responsive anemia,5,prevalence,count,1,169,2,Global,Female,Reproductive age,483922000.0,471750300.0,495578700.0
4,Total iron responsive anemia,3,ylds,rate,6,169,2,China,Female,Reproductive age,0.001997833,0.001313038,0.00283952


# Burdenator estimates

In [22]:
mat_burden_raw = get_draws(
        gbd_id_type=['rei_id', 'cause_id'], # Types must match gbd_id's
        gbd_id=[95, 366], # iron def, maternal disorders
        source='burdenator',
        measure_id=2, # dalys
        metric_id=1, # number
        location_id=location_ids,
        year_id=2019,
    age_group_id=ages,
        sex_id=[2], # Sex aggregates not available
        gbd_round_id=6,
        status='best',
        decomp_step='step5',
    )
mat_burden_raw.head()

Unnamed: 0,age_group_id,cause_id,draw_0,draw_1,draw_10,draw_100,draw_101,draw_102,draw_103,draw_104,...,draw_996,draw_997,draw_998,draw_999,location_id,measure_id,metric_id,rei_id,sex_id,year_id
0,7,366,12119.604302,19443.432191,21611.427124,18809.844215,10733.733228,19474.288146,12500.252382,11760.220344,...,15615.958072,10881.233184,13415.293314,11694.688854,1,2,1,95,2,2019
1,8,366,271423.538755,355678.383836,415699.873912,455242.618361,270196.657577,383776.024252,274926.282206,245097.904329,...,370377.975344,284552.059279,353873.962396,334168.193292,1,2,1,95,2,2019
2,9,366,528179.721962,746531.089708,743630.874715,847137.384992,456141.967843,718647.096414,496015.03776,459468.98671,...,712618.452515,524767.305914,677203.297696,630987.767523,1,2,1,95,2,2019
3,10,366,540937.46803,707847.445982,741599.644674,868017.788149,485108.268665,684967.264005,512155.899508,431129.990883,...,691619.470708,504820.282674,652379.909436,562515.779181,1,2,1,95,2,2019
4,11,366,493040.436822,631029.62358,607754.732527,733541.787587,413441.757547,604503.303875,433105.613628,381073.319945,...,600942.434636,453490.369242,563925.377097,489195.422826,1,2,1,95,2,2019


In [33]:
mat_burden = mat_burden_raw.groupby(['location_id','measure_id','metric_id','rei_id','year_id','cause_id','sex_id'], as_index=False).sum().drop(columns=['age_group_id'])
#mat_burden = mat_burden.apply(pd.DataFrame.describe, percentiles=[0.025,0.975], axis=1).rename(columns={'2.5%':'lower','97.5%':'upper'})
#mat_burden = mat_burden[['mean','lower','upper']].reset_index()
mat_burden['metric_id'] = 'count'
mat_burden.head()

Unnamed: 0,location_id,measure_id,metric_id,rei_id,year_id,cause_id,sex_id,draw_0,draw_1,draw_10,...,draw_990,draw_991,draw_992,draw_993,draw_994,draw_995,draw_996,draw_997,draw_998,draw_999
0,1,2,count,95,2019,366,2,2586464.0,3412007.0,3510536.0,...,1953944.0,3362882.0,3509615.0,2737190.0,2240182.0,2131786.0,3293110.0,2447958.0,3063591.0,2759295.0
1,6,2,count,95,2019,366,2,15925.59,23450.98,20398.04,...,13962.1,23831.12,23020.99,19737.67,17088.51,13578.45,18330.7,16730.02,18855.1,20334.03
2,11,2,count,95,2019,366,2,45628.46,57081.46,69673.49,...,28015.93,65433.4,61512.75,48106.79,57273.9,39656.46,71653.24,45807.08,68768.84,65918.41
3,15,2,count,95,2019,366,2,21835.73,26166.85,26177.62,...,13042.11,20503.26,28336.61,22833.52,17778.82,18344.45,22648.13,19718.36,19729.38,14709.44
4,20,2,count,95,2019,366,2,2404.539,3950.666,3812.177,...,1685.713,3426.143,2869.611,2874.473,2326.428,2417.602,2640.399,2505.255,3434.226,2660.9


In [34]:
mat_rates = mat_burden.merge(pop.groupby(['location_id','sex_id'], as_index=False).sum().drop(columns='age_group_id'), 
                             on=['location_id','sex_id'])
for i in list(range(0,1000)):
    mat_rates[f'draw_{i}'] = mat_rates[f'draw_{i}'] / mat_rates['population']
mat_rates = mat_rates.drop(columns='population')
mat_rates['metric_id'] = 'rate'
mat_rates.head()

Unnamed: 0,location_id,measure_id,metric_id,rei_id,year_id,cause_id,sex_id,draw_0,draw_1,draw_10,...,draw_990,draw_991,draw_992,draw_993,draw_994,draw_995,draw_996,draw_997,draw_998,draw_999
0,1,2,rate,95,2019,366,2,0.001045,0.001378,0.001418,...,0.000789,0.001359,0.001418,0.001106,0.000905,0.000861,0.00133,0.000989,0.001238,0.001115
1,6,2,rate,95,2019,366,2,3.6e-05,5.3e-05,4.6e-05,...,3.1e-05,5.3e-05,5.2e-05,4.4e-05,3.8e-05,3e-05,4.1e-05,3.8e-05,4.2e-05,4.6e-05
2,11,2,rate,95,2019,366,2,0.000511,0.00064,0.000781,...,0.000314,0.000733,0.00069,0.000539,0.000642,0.000445,0.000803,0.000513,0.000771,0.000739
3,15,2,rate,95,2019,366,2,0.001149,0.001377,0.001377,...,0.000686,0.001079,0.001491,0.001201,0.000936,0.000965,0.001192,0.001038,0.001038,0.000774
4,20,2,rate,95,2019,366,2,7.4e-05,0.000121,0.000117,...,5.2e-05,0.000105,8.8e-05,8.8e-05,7.1e-05,7.4e-05,8.1e-05,7.7e-05,0.000105,8.2e-05


In [37]:
mat_dalys = pd.concat([mat_burden, mat_rates], ignore_index=True).drop(columns='rei_id')
mat_dalys = mat_dalys.merge(l, on='location_id')
mat_dalys['measure'] = 'dalys'
mat_dalys['parameter'] = 'Maternal disorder burden attributable to iron deficiency'
mat_dalys['sex'] = 'female'
mat_dalys['age'] = 'Reproductive age'
mat_dalys['age_group_id'] =169
mat_dalys = mat_dalys.set_index([c for c in mat_dalys.columns if 'draw' not in c])
mat_dalys = mat_dalys.apply(pd.DataFrame.describe, percentiles=[0.025,0.975], axis=1).reset_index().rename(columns={'2.5%':'lower','97.5%':'upper'})
mat_dalys = mat_dalys[final_cols]
#mat_dalys.to_csv('maternal_disorder_burden.csv')
mat_dalys.head()

Unnamed: 0,parameter,measure_id,measure,metric_id,location_id,age_group_id,sex_id,location_name,sex,age,mean,lower,upper
0,Maternal disorder burden attributable to iron ...,2,dalys,count,1,169,2,Global,female,Reproductive age,2728445.0,977285.435299,4524662.0
1,Maternal disorder burden attributable to iron ...,2,dalys,rate,1,169,2,Global,female,Reproductive age,0.001102209,0.000395,0.001827827
2,Maternal disorder burden attributable to iron ...,2,dalys,count,6,169,2,China,female,Reproductive age,17558.76,5961.172412,31155.21
3,Maternal disorder burden attributable to iron ...,2,dalys,rate,6,169,2,China,female,Reproductive age,3.939647e-05,1.3e-05,6.990271e-05
4,Maternal disorder burden attributable to iron ...,2,dalys,count,11,169,2,Indonesia,female,Reproductive age,49593.63,17455.589311,85531.33


In [26]:
mat_dalys.metric_id.unique()

array(['count', 'rate'], dtype=object)

In [27]:
ira_wra = pd.concat([rates, counts], ignore_index=True)
ira_wra = ira_wra.merge(l, on='location_id').drop(columns='population')
ira_wra.head()

Unnamed: 0,location_id,sex_id,measure_id,parameter,metric_id,measure,draw_0,draw_1,draw_10,draw_100,...,draw_992,draw_993,draw_994,draw_995,draw_996,draw_997,draw_998,draw_999,age_group_id,location_name
0,1,2,3,Total iron responsive anemia,rate,ylds,0.007511872,0.007026941,0.005773325,0.00710436,...,0.006879478,0.006102708,0.006438835,0.006832483,0.003418422,0.00537477,0.005405647,0.005454462,169,Global
1,1,2,5,Total iron responsive anemia,rate,prevalence,0.1970426,0.1959119,0.1940237,0.1950414,...,0.1986131,0.1964515,0.1992247,0.1897588,0.1979504,0.1960798,0.1998158,0.2007697,169,Global
2,1,2,3,Total iron responsive anemia,count,ylds,18595130.0,17394720.0,14291480.0,17586370.0,...,17029680.0,15106840.0,15938900.0,16913350.0,8462072.0,13304880.0,13381310.0,13502150.0,169,Global
3,1,2,5,Total iron responsive anemia,count,prevalence,487765700.0,484966800.0,480292700.0,482811800.0,...,491653300.0,486302600.0,493167300.0,469735200.0,490012900.0,485382300.0,494630500.0,496992000.0,169,Global
4,6,2,3,Total iron responsive anemia,rate,ylds,0.002142251,0.002166575,0.001518528,0.002086044,...,0.002761842,0.00200092,0.002151645,0.001846951,0.001073899,0.001946601,0.001640802,0.002027621,169,China


In [38]:
mat_dalys = pd.concat([mat_burden, mat_rates], ignore_index=True).drop(columns='rei_id')
mat_dalys = mat_dalys.merge(l, on='location_id')
mat_dalys['measure'] = 'dalys'
mat_dalys['parameter'] = 'Maternal disorder burden attributable to iron deficiency'
mat_dalys['sex'] = 'female'
mat_dalys['age'] = 'Reproductive age'
mat_dalys['age_group_id'] =169
mat_dalys.head()

Unnamed: 0,location_id,measure_id,metric_id,year_id,cause_id,sex_id,draw_0,draw_1,draw_10,draw_100,...,draw_996,draw_997,draw_998,draw_999,location_name,measure,parameter,sex,age,age_group_id
0,1,2,count,2019,366,2,2586464.0,3412007.0,3510536.0,4085623.0,...,3293110.0,2447958.0,3063591.0,2759295.0,Global,dalys,Maternal disorder burden attributable to iron ...,female,Reproductive age,169
1,1,2,rate,2019,366,2,0.001044853,0.001378348,0.00141815,0.001650468,...,0.001330317,0.000988901,0.001237598,0.001114672,Global,dalys,Maternal disorder burden attributable to iron ...,female,Reproductive age,169
2,6,2,count,2019,366,2,15925.59,23450.98,20398.04,27139.24,...,18330.7,16730.02,18855.1,20334.03,China,dalys,Maternal disorder burden attributable to iron ...,female,Reproductive age,169
3,6,2,rate,2019,366,2,3.573213e-05,5.261679e-05,4.576694e-05,6.089211e-05,...,4.112847e-05,3.753702e-05,4.230506e-05,4.562332e-05,China,dalys,Maternal disorder burden attributable to iron ...,female,Reproductive age,169
4,11,2,count,2019,366,2,45628.46,57081.46,69673.49,70991.65,...,71653.24,45807.08,68768.84,65918.41,Indonesia,dalys,Maternal disorder burden attributable to iron ...,female,Reproductive age,169


In [44]:
[c for c in mat_dalys.columns if 'draw' not in c]

['location_id',
 'measure_id',
 'metric_id',
 'year_id',
 'cause_id',
 'sex_id',
 'location_name',
 'measure',
 'parameter',
 'sex',
 'age',
 'age_group_id']

In [45]:
ira_wra['age'] = 169
ira_wra['sex'] = 2
[c for c in ira_wra.columns if 'draw' not in c]

['location_id',
 'sex_id',
 'measure_id',
 'parameter',
 'metric_id',
 'measure',
 'age_group_id',
 'location_name',
 'age',
 'sex']

In [79]:
ira_wra['age'] = 'Reproductive age'
ira_wra['sex'] = 'female'
tot_iron = pd.concat([mat_dalys, ira_wra], ignore_index=False).drop(columns=['year_id','cause_id'])
tot_iron['parameter'] = 'Total iron deficiency burden (including anemia and maternal disorders)'
tot_iron = tot_iron.loc[tot_iron.measure_id!=5]
tot_iron = tot_iron.groupby(['parameter','metric_id','location_id','age_group_id','sex_id','sex','location_name','age']).sum().reset_index()
tot_iron['measure_id'] = 2
tot_iron['measure'] = 'dalys'
tot_iron = tot_iron.set_index([c for c in tot_iron.columns if 'draw' not in c])
tot_iron = tot_iron.apply(pd.DataFrame.describe, percentiles=[0.025,0.975], axis=1).reset_index().rename(columns={'2.5%':'lower','97.5%':'upper'})
tot_iron = tot_iron[final_cols]
tot_iron.to_csv('total_iron_burden.csv')
tot_iron.head()

of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.


  This is separate from the ipykernel package so we can avoid doing imports until


Unnamed: 0,parameter,measure_id,measure,metric_id,location_id,age_group_id,sex_id,location_name,sex,age,mean,lower,upper
0,Total iron deficiency burden (including anemia...,2,dalys,count,1,169,2,Global,female,Reproductive age,18790980.0,13050120.0,25733650.0
1,Total iron deficiency burden (including anemia...,2,dalys,count,6,169,2,China,female,Reproductive age,907980.5,602398.9,1288013.0
2,Total iron deficiency burden (including anemia...,2,dalys,count,11,169,2,Indonesia,female,Reproductive age,600904.1,412163.2,846699.2
3,Total iron deficiency burden (including anemia...,2,dalys,count,15,169,2,Myanmar,female,Reproductive age,202569.1,136951.1,280895.4
4,Total iron deficiency burden (including anemia...,2,dalys,count,20,169,2,Viet Nam,female,Reproductive age,99939.28,65940.91,147102.7


In [74]:
tot_iron.sex.unique()

array(['female'], dtype=object)

In [75]:
len(tot_iron)

52

In [78]:
len(tot_iron.location_id.unique()) * 2

52