In [1]:
from vivarium import Artifact
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from db_queries import get_ids, get_outputs
import scipy.stats

!whoami
!date

alibow
Mon Jun 29 11:08:12 PDT 2020


In [2]:
output_dirs = ['/share/costeffectiveness/results/vivarium_conic_lsff/india/2020_06_26_20_35_00/count_data/',
              '/share/costeffectiveness/results/vivarium_conic_lsff/nigeria/2020_06_26_20_28_27/count_data/']

locations = ['India','Nigeria']

In [22]:
# load state person time 

state_pt = pd.DataFrame()
for i in range(len(output_dirs)):
    data = pd.read_hdf(output_dirs[i] + 'state_person_time.hdf')
    data['location'] = locations[i]
    state_pt = state_pt.append(data)
state_pt = state_pt.groupby(['location','scenario','input_draw','cause'], as_index=False).sum()
state_pt.head()

Unnamed: 0,location,scenario,input_draw,cause,value
0,India,baseline,21,diarrheal_diseases,20933.23
1,India,baseline,21,lower_respiratory_infections,4057.634
2,India,baseline,21,measles,1375.91
3,India,baseline,21,neural_tube_defects,860.2136
4,India,baseline,21,susceptible_to_diarrheal_diseases,1504500.0


In [23]:
# causes in state person time dataframe
np.unique(state_pt['cause'])

array(['diarrheal_diseases', 'lower_respiratory_infections', 'measles',
       'neural_tube_defects', 'susceptible_to_diarrheal_diseases',
       'susceptible_to_lower_respiratory_infections',
       'susceptible_to_measles', 'susceptible_to_neural_tube_defects',
       'susceptible_to_vitamin_a_deficiency', 'vitamin_a_deficiency'],
      dtype=object)

In [24]:
# name all cause model groups
causes = ['diarrheal_diseases', 
       'lower_respiratory_infections', 'measles', 'neural_tube_defects',
       'vitamin_a_deficiency']

In [25]:
# create new dataframe that groups by cause (regardless of infected status)
cause_group_pt = pd.DataFrame.copy(state_pt)
cause_group_pt['cause_group'] = 0
for cause in causes:
    cause_group_pt['cause_group'] = np.where(cause_group_pt['cause'].str.contains(cause), cause, cause_group_pt['cause_group'])
cause_group_pt.head()

Unnamed: 0,location,scenario,input_draw,cause,value,cause_group
0,India,baseline,21,diarrheal_diseases,20933.23,diarrheal_diseases
1,India,baseline,21,lower_respiratory_infections,4057.634,lower_respiratory_infections
2,India,baseline,21,measles,1375.91,measles
3,India,baseline,21,neural_tube_defects,860.2136,neural_tube_defects
4,India,baseline,21,susceptible_to_diarrheal_diseases,1504500.0,diarrheal_diseases


In [26]:
# sum person time across each cause group
cause_group_pt = cause_group_pt.groupby(['location','scenario','input_draw','cause_group'], as_index=False).sum()
cause_group_pt.head(10)

# person time between each cause group is the same except for measles, 
    # which is an SIR model and expected to be slightly off because of this

Unnamed: 0,location,scenario,input_draw,cause_group,value
0,India,baseline,21,diarrheal_diseases,1525433.0
1,India,baseline,21,lower_respiratory_infections,1525433.0
2,India,baseline,21,measles,1449199.0
3,India,baseline,21,neural_tube_defects,1525433.0
4,India,baseline,21,vitamin_a_deficiency,1525433.0
5,India,baseline,29,diarrheal_diseases,1523152.0
6,India,baseline,29,lower_respiratory_infections,1523152.0
7,India,baseline,29,measles,1424728.0
8,India,baseline,29,neural_tube_defects,1523152.0
9,India,baseline,29,vitamin_a_deficiency,1523152.0


In [27]:
# calculate state prevalence using cause_group == 'diarrheal_diseases' as a measure for overall persontime

state_pt = state_pt.set_index(['location','scenario','input_draw','cause'])
cause_group_pt = cause_group_pt.loc[cause_group_pt.cause_group == 'diarrheal_diseases']
cause_group_pt = cause_group_pt.set_index(['location','scenario','input_draw']).drop(columns='cause_group')
prev = state_pt / cause_group_pt
prev.head(10)

# prevalence of infected and susceptible person time adds to approx 1 as expected :) 

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,value
location,scenario,input_draw,cause,Unnamed: 4_level_1
India,baseline,21,diarrheal_diseases,0.013723
India,baseline,21,lower_respiratory_infections,0.00266
India,baseline,21,measles,0.000902
India,baseline,21,neural_tube_defects,0.000564
India,baseline,21,susceptible_to_diarrheal_diseases,0.986277
India,baseline,21,susceptible_to_lower_respiratory_infections,0.99734
India,baseline,21,susceptible_to_measles,0.949123
India,baseline,21,susceptible_to_neural_tube_defects,0.999436
India,baseline,21,susceptible_to_vitamin_a_deficiency,0.701726
India,baseline,21,vitamin_a_deficiency,0.298274


## Now compare this to prevalence estimates using the person_time.hdf data

In [28]:
# load state person time 

state_pt = pd.DataFrame()
for i in range(len(output_dirs)):
    data = pd.read_hdf(output_dirs[i] + 'state_person_time.hdf')
    data['location'] = locations[i]
    state_pt = state_pt.append(data)
state_pt = state_pt.groupby(['location','scenario','input_draw','cause'], as_index=False).sum()
state_pt.head()

Unnamed: 0,location,scenario,input_draw,cause,value
0,India,baseline,21,diarrheal_diseases,20933.23
1,India,baseline,21,lower_respiratory_infections,4057.634
2,India,baseline,21,measles,1375.91
3,India,baseline,21,neural_tube_defects,860.2136
4,India,baseline,21,susceptible_to_diarrheal_diseases,1504500.0


In [30]:
# load person time 

pt = pd.DataFrame()
for i in range(len(output_dirs)):
    data = pd.read_hdf(output_dirs[i] + 'person_time.hdf')
    data['location'] = locations[i]
    pt = pt.append(data)
pt = pt.groupby(['location','scenario','input_draw'], as_index=False).sum()
pt.head()

Unnamed: 0,location,scenario,input_draw,value
0,India,baseline,21,318053100.0
1,India,baseline,29,318456700.0
2,India,baseline,55,318348100.0
3,India,baseline,78,317714600.0
4,India,baseline,155,317537100.0


In [31]:
state_pt = state_pt.set_index(['location','scenario','input_draw','cause'])
pt = pt.set_index(['location','scenario','input_draw'])
prev = state_pt / pt
prev.head(10)

# prevalence across infected/suscpetible states for a given cause does NOT add up to 1

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,value
location,scenario,input_draw,cause,Unnamed: 4_level_1
India,baseline,21,diarrheal_diseases,6.6e-05
India,baseline,21,lower_respiratory_infections,1.3e-05
India,baseline,21,measles,4e-06
India,baseline,21,neural_tube_defects,3e-06
India,baseline,21,susceptible_to_diarrheal_diseases,0.00473
India,baseline,21,susceptible_to_lower_respiratory_infections,0.004783
India,baseline,21,susceptible_to_measles,0.004552
India,baseline,21,susceptible_to_neural_tube_defects,0.004793
India,baseline,21,susceptible_to_vitamin_a_deficiency,0.003366
India,baseline,21,vitamin_a_deficiency,0.001431


# Overall conclusion:

## Use state_person_time.hdf value for a given cause group (diarrheal_diseases and susceptible_to_diarrheal_diseases states) as total person time value for a given stratum rather than person_time from the person_time.hdf file