In [1]:
import numpy as np, matplotlib.pyplot as plt, pandas as pd
pd.set_option('display.max_rows', 8)
!date

%load_ext autoreload
%autoreload 2

Mon Mar 22 22:23:29 PDT 2021


In [2]:
from db_queries import get_population, get_location_metadata, get_ids, get_model_results, get_covariate_estimates

In [3]:
## load legal combos
import pickle
data_prep_dir = '/ihme/homes/beatrixh/vivarium_research_lsff/data_prep/inputs/'

with open(data_prep_dir + 'lsff_vehicle_nutrient_pairs.pickle', 'rb') as handle:
    vehicle_nutrient_map = pickle.load(handle)
    
with open(data_prep_dir + 'lsff_country_vehicle_pairs.pickle', 'rb') as handle:
    country_vehicle_map = pickle.load(handle)
    
with open(data_prep_dir + 'lsff_vehicle_country_pairs.pickle', 'rb') as handle:
    vehicle_country_map = pickle.load(handle)

In [4]:
frame = pd.DataFrame([(loc, v) for loc in country_vehicle_map.keys() for v in country_vehicle_map[loc]],
                    columns=['location_name','vehicle'])

In [5]:
data_path = '/ihme/homes/beatrixh/vivarium_research_lsff/data_prep/outputs/population_coverage_data_and_estimates_3_22_2021.csv'
data = pd.read_csv(data_path)

data.loc[data.location_name=="Vietnam",'location_name'] = "Viet Nam"

In [6]:
data.head()

Unnamed: 0,location_name,vehicle,nutrient,value_description,value_mean,value_025_percentile,value_975_percentile,is_estimate,sub_population,source_year,standard
0,Angola,maize flour,folic acid,percent of vehicle that is fortified,0.0,,,0.0,total population,2018,
1,Angola,maize flour,iron,percent of vehicle that is fortified,0.0,,,0.0,total population,2018,
2,Angola,maize flour,iron,percent of vehicle that is fortified,0.0,,,0.0,total population,2021,
3,Angola,maize flour,vitamin b1,percent of vehicle that is fortified,0.0,,,0.0,total population,2018,
4,Angola,maize flour,vitamin b12,percent of vehicle that is fortified,0.0,,,0.0,total population,2018,


In [7]:
merge_cols = ['location_name','vehicle']
df = pd.DataFrame()
for col in ['percent of population eating industrially produced vehicle',
            'percent of population eating vehicle',
            'percent of population eating fortified vehicle']:
    rename = {'value_mean':col.replace(" ","_")}
    if col=='percent of population eating fortified vehicle':
        df_i = data[(data.value_description==col)][merge_cols + ['nutrient','value_mean']].dropna().rename(columns = rename)

    else:
        df_i = data[(data.value_description==col)][merge_cols + ['value_mean']].dropna().rename(columns = rename)
    if(len(df)==0):
        df = df_i
    else:
        df = df.merge(df_i, on = merge_cols, how = 'outer')    

In [8]:
# df = df[df.percent_of_population_eating_industrially_produced_vehicle.notna()].drop_duplicates()
df = df.drop_duplicates()

In [9]:
# make sure using all location-vehicle pairs, even if have no data
df = df.merge(frame, on = ['location_name', 'vehicle'], how = 'outer')

In [10]:
locations = get_location_metadata(location_set_id=1, gbd_round_id=6, decomp_step="step4")

In [11]:
locations = locations.loc[locations.location_name.isin(df.location_name.unique()),['location_name','location_id']]
locs = locations.location_id.tolist()

In [12]:
locations

Unnamed: 0,location_name,location_id
446,Egypt,141
458,Sudan,522
466,Bangladesh,161
468,India,163
...,...,...
627,Côte d'Ivoire,205
629,Ghana,207
635,Niger,213
636,Nigeria,214


In [13]:
df = df.merge(locations[['location_id','location_name']], on = 'location_name', how = 'left')

In [14]:
me_ids = get_ids("modelable_entity")

In [15]:
me_ids[(me_ids.modelable_entity_id.isin([24374,24381]))]

Unnamed: 0,modelable_entity_id,modelable_entity_name,modelable_entity_description
9962,24374,Diet low in whole grains (g/day) 25+,Post ST-GPR age 25+ model used in estimation o...
9969,24381,Diet high in sodium (g/day) 25+,Post ST-GPR age 25+ model used in estimation o...


In [16]:
covariates = get_ids("covariate")

In [17]:
covariates[(covariates.covariate_name_short=="sdi")]

Unnamed: 0,covariate_id,covariate_name,covariate_name_short,covariate_description
336,881,Socio-demographic Index,sdi,A measure of development estimated via princip...


In [18]:
[i for i in covariates.covariate_name_short if "iodize" in i]

['hh_iodized_salt_pc']

In [19]:
covariates[covariates.covariate_name_short=="hh_iodized_salt_pc"]

Unnamed: 0,covariate_id,covariate_name,covariate_name_short,covariate_description
21,46,Proportion of households using iodized salt (a...,hh_iodized_salt_pc,"Proportion of households using iodized salt, w..."


In [20]:
sdi = get_covariate_estimates(
    gbd_round_id=6,
    covariate_id=881,
    decomp_step='step4'
)

In [21]:
sdi = sdi.loc[(sdi.location_id.isin(locs)) & (sdi.year_id==2019),['location_id','mean_value']]

In [22]:
sdi = sdi.rename(columns = {'mean_value':'sdi'})

In [23]:
hh_salt = get_covariate_estimates(
    gbd_round_id=6,
    covariate_id=46,
    decomp_step='step4'
)

In [24]:
hh_salt = hh_salt.loc[(hh_salt.location_id.isin(locs)) & (hh_salt.year_id==2019),['location_id','mean_value']]

In [25]:
hh_salt = hh_salt.rename(columns = {'mean_value':'hh_salt'})

In [26]:
def sex_weight(df, index_cols = ['location_id']):
    """
    Assumes one year (2019), all age (22)
    sex weight "mean" var
    """
    
    N = 1
    for col in index_cols:
        N = N * df[col].nunique()
        
    assert(len(df)==2 * N), "provided index_cols + sex aren't a unique key for this df"
    
    pop = get_population(age_group_id=22,
                     sex_id = [1,2], 
                     location_id=locs, 
                     year_id=2019, 
                     gbd_round_id=6, 
                     decomp_step='step4')
    
    
    # add population counts
    df = df.merge(pop[['location_id','sex_id','population']], 
                  on = ['location_id','sex_id'], 
                  how = 'left')
    
    # weight mean
    df['sex_weight'] = df.population / df.groupby(['location_id']).transform('sum').population
    df['mean'] = df['mean'] * df.sex_weight
    return df.groupby(['location_id'])[['mean']].sum().reset_index()
    

In [27]:
whole_grains = get_model_results('epi', 24374, gbd_round_id=6, sex_id = [1,2], age_group_id=22, location_id=locs, year_id = 2019, decomp_step = 'step4')
whole_grains = sex_weight(whole_grains)
whole_grains = whole_grains.rename(columns = {'mean':'whole_grains'})

In [28]:
sodium = get_model_results('epi', 24381, gbd_round_id=6, sex_id = [1,2], age_group_id=22, location_id=locs, year_id = 2019, decomp_step = 'step4')
sodium = sex_weight(sodium)
sodium = sodium.rename(columns = {'mean':'sodium'})

In [29]:
df = df.merge(sodium, on = 'location_id', how = 'left')

In [30]:
df = df.merge(whole_grains, on = 'location_id', how = 'left')

In [31]:
df = df.merge(sdi, on = 'location_id', how = 'left')
df = df.merge(hh_salt, on = 'location_id', how = 'left')

In [32]:
df

Unnamed: 0,location_name,vehicle,percent_of_population_eating_industrially_produced_vehicle,percent_of_population_eating_vehicle,nutrient,percent_of_population_eating_fortified_vehicle,location_id,sodium,whole_grains,sdi,hh_salt
0,Bangladesh,oil,88.4,95.0,vitamin a,42.849572,161.0,1.869800,30.645066,0.475232,0.809817
1,Bangladesh,oil,88.4,95.0,vitamin d,0.000000,161.0,1.869800,30.645066,0.475232,0.809817
2,Bangladesh,oil,88.4,89.0,vitamin a,42.849572,161.0,1.869800,30.645066,0.475232,0.809817
3,Bangladesh,oil,88.4,89.0,vitamin d,0.000000,161.0,1.869800,30.645066,0.475232,0.809817
...,...,...,...,...,...,...,...,...,...,...,...
1333,China,wheat flour,,,,,6.0,5.079122,16.393549,0.691999,0.974691
1334,China,oil,,,,,6.0,5.079122,16.393549,0.691999,0.974691
1335,,wheat flour,,,,,,,,,
1336,,oil,,,,,,,,,


In [33]:
fao_path = '/ihme/homes/beatrixh/vivarium_research_lsff/data_prep/inputs/FAOSTAT_food_supply_aggregated_oil_2018.csv'
fao = pd.read_csv(fao_path)
fao.loc[(fao.location_name=="Viet Nam"),'location_name'] = "Vietnam"

In [34]:
df = df.merge(fao, on = 'location_name', how = 'left')

In [35]:
parents = get_location_metadata(location_set_id=1, gbd_round_id=6, decomp_step="step4")

In [36]:
parents = parents.loc[(parents.location_id.isin(df.location_id)),['location_id','parent_id']]

In [37]:
parents

Unnamed: 0,location_id,parent_id
446,141,138
458,522,138
466,161,159
468,163,159
...,...,...
627,205,199
629,207,199
635,213,199
636,214,199


In [38]:
df = df.merge(parents, on = 'location_id')

In [39]:
df.head()

Unnamed: 0,location_name,vehicle,percent_of_population_eating_industrially_produced_vehicle,percent_of_population_eating_vehicle,nutrient,percent_of_population_eating_fortified_vehicle,location_id,sodium,whole_grains,sdi,hh_salt,fao_maize_and_products,fao_rice_and_products,fao_wheat_and_products,fao_oil_all,parent_id
0,Bangladesh,oil,88.4,95.0,vitamin a,42.849572,161.0,1.8698,30.645066,0.475232,0.809817,7.0,1728.0,164.0,178.0,159
1,Bangladesh,oil,88.4,95.0,vitamin d,0.0,161.0,1.8698,30.645066,0.475232,0.809817,7.0,1728.0,164.0,178.0,159
2,Bangladesh,oil,88.4,89.0,vitamin a,42.849572,161.0,1.8698,30.645066,0.475232,0.809817,7.0,1728.0,164.0,178.0,159
3,Bangladesh,oil,88.4,89.0,vitamin d,0.0,161.0,1.8698,30.645066,0.475232,0.809817,7.0,1728.0,164.0,178.0,159
4,Bangladesh,oil,88.4,75.0,vitamin a,42.849572,161.0,1.8698,30.645066,0.475232,0.809817,7.0,1728.0,164.0,178.0,159


In [40]:
compare = pd.read_csv(data_path)


In [42]:
compare[['location_name','vehicle','nutrient','value_description']]

Unnamed: 0,location_name,vehicle,nutrient,value_description
0,Angola,maize flour,folic acid,percent of vehicle that is fortified
1,Angola,maize flour,iron,percent of vehicle that is fortified
2,Angola,maize flour,iron,percent of vehicle that is fortified
3,Angola,maize flour,vitamin b1,percent of vehicle that is fortified
...,...,...,...,...
1225,India,salt,folic acid,percent of population eating fortified vehicle
1226,Myanmar,wheat flour,vitamin a,percent of population eating fortified vehicle
1227,Nigeria,salt,folic acid,percent of population eating fortified vehicle
1228,Pakistan,wheat flour,vitamin a,percent of population eating fortified vehicle


In [43]:
df.to_csv('/ihme/homes/beatrixh/vivarium_research_lsff/data_prep/outputs/lsff_estimated_data_plus_covariates_with_nutrient_3_22_2021.csv', index = False)