In [1]:
from vivarium import Artifact
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from db_queries import get_ids, get_outputs
import scipy.stats

!whoami
!date

alibow
Tue Jul  7 13:22:27 PDT 2020


In [41]:
output_dirs = ['/share/costeffectiveness/results/vivarium_conic_lsff/ethiopia/2020_06_28_12_40_56/count_data/',
               '/share/costeffectiveness/results/vivarium_conic_lsff/india/2020_06_26_20_35_00/count_data/',
              '/share/costeffectiveness/results/vivarium_conic_lsff/nigeria/2020_06_26_20_28_27/count_data/']

locations = ['Ethiopia','India','Nigeria']

In [42]:
# load state person time 

state_pt = pd.DataFrame()
for i in range(len(output_dirs)):
    data = pd.read_hdf(output_dirs[i] + 'state_person_time.hdf')
    data['location'] = locations[i]
    state_pt = state_pt.append(data)
state_pt = state_pt.groupby(['location','scenario','input_draw','cause'], as_index=False).sum()
state_pt.head()

of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.


  sort=sort)


Unnamed: 0,location,scenario,input_draw,cause,value
0,Ethiopia,baseline,21,diarrheal_diseases,50503.63
1,Ethiopia,baseline,21,lower_respiratory_infections,3994.637
2,Ethiopia,baseline,21,measles,613.7796
3,Ethiopia,baseline,21,neural_tube_defects,1496.805
4,Ethiopia,baseline,21,susceptible_to_diarrheal_diseases,1610312.0


In [43]:
# causes in state person time dataframe
np.unique(state_pt['cause'])

array(['diarrheal_diseases', 'lower_respiratory_infections', 'measles',
       'neural_tube_defects', 'susceptible_to_diarrheal_diseases',
       'susceptible_to_lower_respiratory_infections',
       'susceptible_to_measles', 'susceptible_to_neural_tube_defects',
       'susceptible_to_vitamin_a_deficiency', 'vitamin_a_deficiency'],
      dtype=object)

In [44]:
# name all cause model groups
causes = ['diarrheal_diseases', 
       'lower_respiratory_infections', 'measles', 'neural_tube_defects',
       'vitamin_a_deficiency']

In [45]:
# create new dataframe that groups by cause (regardless of infected status)
cause_group_pt = pd.DataFrame.copy(state_pt)
cause_group_pt['cause_group'] = 0
for cause in causes:
    cause_group_pt['cause_group'] = np.where(cause_group_pt['cause'].str.contains(cause), cause, cause_group_pt['cause_group'])
cause_group_pt.head()

Unnamed: 0,location,scenario,input_draw,cause,value,cause_group
0,Ethiopia,baseline,21,diarrheal_diseases,50503.63,diarrheal_diseases
1,Ethiopia,baseline,21,lower_respiratory_infections,3994.637,lower_respiratory_infections
2,Ethiopia,baseline,21,measles,613.7796,measles
3,Ethiopia,baseline,21,neural_tube_defects,1496.805,neural_tube_defects
4,Ethiopia,baseline,21,susceptible_to_diarrheal_diseases,1610312.0,diarrheal_diseases


In [46]:
# sum person time across each cause group
cause_group_pt = cause_group_pt.groupby(['location','scenario','input_draw','cause_group'], as_index=False).sum()
cause_group_pt.head(10)

# person time between each cause group is the same except for measles, 
    # which is an SIR model and expected to be slightly off because of this

Unnamed: 0,location,scenario,input_draw,cause_group,value
0,Ethiopia,baseline,21,diarrheal_diseases,1660816.0
1,Ethiopia,baseline,21,lower_respiratory_infections,1660816.0
2,Ethiopia,baseline,21,measles,1627085.0
3,Ethiopia,baseline,21,neural_tube_defects,1660816.0
4,Ethiopia,baseline,21,vitamin_a_deficiency,1660816.0
5,Ethiopia,baseline,29,diarrheal_diseases,1660506.0
6,Ethiopia,baseline,29,lower_respiratory_infections,1660506.0
7,Ethiopia,baseline,29,measles,1607126.0
8,Ethiopia,baseline,29,neural_tube_defects,1660506.0
9,Ethiopia,baseline,29,vitamin_a_deficiency,1660506.0


In [47]:
# calculate state prevalence using cause_group == 'diarrheal_diseases' as a measure for overall persontime

state_pt = state_pt.set_index(['location','scenario','input_draw','cause'])
cause_group_pt = cause_group_pt.loc[cause_group_pt.cause_group == 'diarrheal_diseases']
cause_group_pt = cause_group_pt.set_index(['location','scenario','input_draw']).drop(columns='cause_group')
prev = state_pt / cause_group_pt
prev.head(10)

# prevalence of infected and susceptible person time adds to approx 1 as expected :) 

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,value
location,scenario,input_draw,cause,Unnamed: 4_level_1
Ethiopia,baseline,21,diarrheal_diseases,0.030409
Ethiopia,baseline,21,lower_respiratory_infections,0.002405
Ethiopia,baseline,21,measles,0.00037
Ethiopia,baseline,21,neural_tube_defects,0.000901
Ethiopia,baseline,21,susceptible_to_diarrheal_diseases,0.969591
Ethiopia,baseline,21,susceptible_to_lower_respiratory_infections,0.997595
Ethiopia,baseline,21,susceptible_to_measles,0.979321
Ethiopia,baseline,21,susceptible_to_neural_tube_defects,0.999099
Ethiopia,baseline,21,susceptible_to_vitamin_a_deficiency,0.620203
Ethiopia,baseline,21,vitamin_a_deficiency,0.379797


In [48]:
prev = prev.reset_index()
prev.loc[prev.location == 'India'].head(10)

Unnamed: 0,location,scenario,input_draw,cause,value
1000,India,baseline,21,diarrheal_diseases,0.013723
1001,India,baseline,21,lower_respiratory_infections,0.00266
1002,India,baseline,21,measles,0.000902
1003,India,baseline,21,neural_tube_defects,0.000564
1004,India,baseline,21,susceptible_to_diarrheal_diseases,0.986277
1005,India,baseline,21,susceptible_to_lower_respiratory_infections,0.99734
1006,India,baseline,21,susceptible_to_measles,0.949123
1007,India,baseline,21,susceptible_to_neural_tube_defects,0.999436
1008,India,baseline,21,susceptible_to_vitamin_a_deficiency,0.701726
1009,India,baseline,21,vitamin_a_deficiency,0.298274


In [49]:
prev.loc[prev.location == 'Nigeria'].head(10)

Unnamed: 0,location,scenario,input_draw,cause,value
2000,Nigeria,baseline,21,diarrheal_diseases,0.028547
2001,Nigeria,baseline,21,lower_respiratory_infections,0.001839
2002,Nigeria,baseline,21,measles,0.000698
2003,Nigeria,baseline,21,neural_tube_defects,0.001323
2004,Nigeria,baseline,21,susceptible_to_diarrheal_diseases,0.971453
2005,Nigeria,baseline,21,susceptible_to_lower_respiratory_infections,0.998161
2006,Nigeria,baseline,21,susceptible_to_measles,0.960942
2007,Nigeria,baseline,21,susceptible_to_neural_tube_defects,0.998677
2008,Nigeria,baseline,21,susceptible_to_vitamin_a_deficiency,0.759513
2009,Nigeria,baseline,21,vitamin_a_deficiency,0.240487


## Now compare this to prevalence estimates using the person_time.hdf data

In [32]:
# load state person time 

state_pt = pd.DataFrame()
for i in range(len(output_dirs)):
    data = pd.read_hdf(output_dirs[i] + 'state_person_time.hdf')
    data['location'] = locations[i]
    state_pt = state_pt.append(data)
state_pt = state_pt.groupby(['location','scenario','input_draw','cause'], as_index=False).sum()
state_pt.head()

Unnamed: 0,location,scenario,input_draw,cause,value
0,Ethiopia,baseline,21,diarrheal_diseases,50503.63
1,Ethiopia,baseline,21,lower_respiratory_infections,3994.637
2,Ethiopia,baseline,21,measles,613.7796
3,Ethiopia,baseline,21,neural_tube_defects,1496.805
4,Ethiopia,baseline,21,susceptible_to_diarrheal_diseases,1610312.0


In [33]:
# load person time 

pt = pd.DataFrame()
for i in range(len(output_dirs)):
    data = pd.read_hdf(output_dirs[i] + 'person_time.hdf')
    data['location'] = locations[i]
    pt = pt.append(data)
pt = pt.groupby(['location','scenario','input_draw'], as_index=False).sum()
pt.head()

Unnamed: 0,location,scenario,input_draw,value
0,Ethiopia,baseline,21,1660816.0
1,Ethiopia,baseline,29,1660506.0
2,Ethiopia,baseline,55,1658936.0
3,Ethiopia,baseline,78,1661091.0
4,Ethiopia,baseline,155,1663234.0


In [34]:
state_pt = state_pt.set_index(['location','scenario','input_draw','cause'])
pt = pt.set_index(['location','scenario','input_draw'])
prev = state_pt / pt
prev.head(10)

# looks good for Ethiopia (prevalence across infected/suscpetible states for a given cause adds up to ~1)

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,value
location,scenario,input_draw,cause,Unnamed: 4_level_1
Ethiopia,baseline,21,diarrheal_diseases,0.030409
Ethiopia,baseline,21,lower_respiratory_infections,0.002405
Ethiopia,baseline,21,measles,0.00037
Ethiopia,baseline,21,neural_tube_defects,0.000901
Ethiopia,baseline,21,susceptible_to_diarrheal_diseases,0.969591
Ethiopia,baseline,21,susceptible_to_lower_respiratory_infections,0.997595
Ethiopia,baseline,21,susceptible_to_measles,0.979321
Ethiopia,baseline,21,susceptible_to_neural_tube_defects,0.999099
Ethiopia,baseline,21,susceptible_to_vitamin_a_deficiency,0.620203
Ethiopia,baseline,21,vitamin_a_deficiency,0.379797


In [39]:
prev = prev.reset_index()
prev.loc[prev.location == 'India'].head(10)

# does NOT look good for India (prevalence of infected/susceptible states does NOT add up to 1)

Unnamed: 0,location,scenario,input_draw,cause,value
1000,India,baseline,21,diarrheal_diseases,6.6e-05
1001,India,baseline,21,lower_respiratory_infections,1.3e-05
1002,India,baseline,21,measles,4e-06
1003,India,baseline,21,neural_tube_defects,3e-06
1004,India,baseline,21,susceptible_to_diarrheal_diseases,0.00473
1005,India,baseline,21,susceptible_to_lower_respiratory_infections,0.004783
1006,India,baseline,21,susceptible_to_measles,0.004552
1007,India,baseline,21,susceptible_to_neural_tube_defects,0.004793
1008,India,baseline,21,susceptible_to_vitamin_a_deficiency,0.003366
1009,India,baseline,21,vitamin_a_deficiency,0.001431


In [40]:
prev.loc[prev.location == 'Nigeria'].head(10)

# also does NOT look good for Nigeria

Unnamed: 0,location,scenario,input_draw,cause,value
2000,Nigeria,baseline,21,diarrheal_diseases,0.000151
2001,Nigeria,baseline,21,lower_respiratory_infections,1e-05
2002,Nigeria,baseline,21,measles,4e-06
2003,Nigeria,baseline,21,neural_tube_defects,7e-06
2004,Nigeria,baseline,21,susceptible_to_diarrheal_diseases,0.005142
2005,Nigeria,baseline,21,susceptible_to_lower_respiratory_infections,0.005284
2006,Nigeria,baseline,21,susceptible_to_measles,0.005087
2007,Nigeria,baseline,21,susceptible_to_neural_tube_defects,0.005286
2008,Nigeria,baseline,21,susceptible_to_vitamin_a_deficiency,0.00402
2009,Nigeria,baseline,21,vitamin_a_deficiency,0.001273


# Overall conclusion:

## Use state_person_time.hdf value for a given cause group (diarrheal_diseases and susceptible_to_diarrheal_diseases states) as total person time value for a given stratum rather than person_time from the person_time.hdf file