In [1]:
from db_queries import get_population, get_ids
from db_queries import get_location_metadata as get_locs

In [2]:
import pandas as pd, numpy as np, matplotlib.pyplot as plt

# Prep g/day for Tier 3 locs

## Kenya, Burkina Faso, Myanmar, Vietnam, Nepal

In [3]:
## load targets
import pickle
data_prep_dir = '/ihme/homes/beatrixh/vivarium_research_lsff/data_prep/inputs/'

with open(data_prep_dir + 'lsff_vehicle_nutrient_pairs.pickle', 'rb') as handle:
    vehicle_nutrient_map = pickle.load(handle)
    
with open(data_prep_dir + 'lsff_country_vehicle_pairs.pickle', 'rb') as handle:
    country_vehicle_map = pickle.load(handle)

In [4]:
gday_path =  '/ihme/homes/beatrixh/vivarium_research_lsff/data_prep/inputs/gday_extraction_sheet_03_22_2021.csv'
gday = pd.read_csv(gday_path)

assert(len(gday[gday.location_name.isna()])==0), "Some rows missing location name"

In [5]:
output = pd.DataFrame()

In [6]:
# location_names = ['Pakistan','Bangladesh','United Republic of Tanzania','Uganda','South Africa']

location_names = ['Kenya', 'Burkina Faso', 'Myanmar', 'Vietnam', 'Nepal']
vehicles = ['wheat flour','maize flour','oil']

In [7]:
# these are the vehicles per country we need
target = pd.DataFrame([(loc,v) for loc in location_names for v in country_vehicle_map[loc]],
            columns=['location_name','vehicle']).sort_values(['location_name','vehicle']).set_index(['location_name','vehicle'])

target

location_name,vehicle
Burkina Faso,bouillon
Burkina Faso,maize flour
Burkina Faso,oil
Burkina Faso,wheat flour
Kenya,maize flour
Kenya,oil
Kenya,wheat flour
Myanmar,oil
Myanmar,wheat flour
Nepal,oil


In [8]:
gday.location_id = gday.location_id.fillna(-1).astype(int)

In [9]:
gday.location_name.unique()

array(['Afghanistan', 'Angola', 'Bangladesh', 'Burkina Faso',
       "Côte d'Ivoire", 'Cameroon', 'Chad', 'China ',
       'Democratic Republic of the Congo', 'Egypt', 'Ethiopia', 'Ghana',
       'India', 'Indonesia', 'Kenya', 'Madagascar', 'Mozambique',
       'Myanmar', 'Nepal', 'Niger', 'Nigeria', 'Pakistan', 'Philippines',
       'South Africa', 'Sudan', 'Uganda', 'United Republic of Tanzania',
       'Viet Nam', 'Yemen', 'Zambia'], dtype=object)

In [10]:
# estimate CIs, crude

# clean value_mean
gday.loc[gday.value_mean=='na','value_mean'] = np.nan
gday.value_mean = gday.value_mean.astype(float)

# clean 2.5th %ile
gday.loc[gday.value_025_percentile=='na','value_025_percentile'] = np.nan
gday.value_025_percentile = gday.value_025_percentile.astype(float)

# clean 97.5th %ile
gday.loc[gday.value_975_percentile=='na','value_975_percentile'] = np.nan
gday.value_975_percentile = gday.value_975_percentile.astype(float)

# calc scale_over_mean
gday['scale_over_mean'] = (gday.value_975_percentile - gday.value_025_percentile) / gday.value_mean

#foreach vehicle, average scale_over_mean
r = gday[['vehicle','scale_over_mean']].groupby('vehicle').mean().dropna().rename(columns={'scale_over_mean':'r'})

In [11]:
# these are the vehicles for which we have a scale_over_mean estimate
r

Unnamed: 0_level_0,r
vehicle,Unnamed: 1_level_1
bouillon,0.902591
wheat flour,0.699893


In [12]:
# for vehicles without a scale_over_mean value, we'll assign the average of the others (row-wise)
r = r.reset_index().append(pd.DataFrame([(i,gday.scale_over_mean.mean()) for i in ['maize flour', 'wheat(not specifically flour)','salt', 'rice']],
            columns = ['vehicle','r']))
r

Unnamed: 0,vehicle,r
0,bouillon,0.902591
1,wheat flour,0.699893
0,maize flour,0.819483
1,wheat(not specifically flour),0.819483
2,salt,0.819483
3,rice,0.819483


In [13]:
# add uncertainty
gday = gday.merge(r, on = 'vehicle', how = 'outer')
gday['lower'] = gday.value_mean - (gday.r * gday.value_mean)/2
gday['upper'] = gday.value_mean + (gday.r * gday.value_mean)/2

In [14]:
gday[(gday.location_name.isin(location_names)) & (gday.vehicle.isin(vehicles))].groupby(['location_name','vehicle']).mean()

Unnamed: 0_level_0,Unnamed: 1_level_0,location_id,subnational_location_id,value_mean,value_025_percentile,value_975_percentile,scale_over_mean,r,lower,upper
location_name,vehicle,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
Burkina Faso,maize flour,201,,140.6004,,,,0.819483,82.990551,198.210249
Burkina Faso,wheat flour,201,,26.888824,,,,0.699893,17.479179,36.298468
Kenya,maize flour,180,,226.2025,,,,0.819483,133.5179,318.8871
Kenya,wheat flour,180,,75.837917,,,,0.699893,49.298719,102.377115
Myanmar,maize flour,15,,4.52,,,,0.819483,2.667967,6.372033
Myanmar,wheat flour,15,,13.8376,,,,0.699893,8.995183,18.680017
Nepal,wheat flour,-1,,108.072143,,,,0.699893,70.252697,145.891589


In [15]:
gday[(gday.location_name=="Nepal")].value_description.unique()

array(['Mean per capita consumption among consumers (g/day)',
       'Mean per capita consumption (g/day)'], dtype=object)

In [16]:
## dicts for var cleaning

value_d_to_metric = {
 'Median amount of vehicle consumed on previous day among consumers (g/day)': 'median',
 'Mean per capita consumption (g/day)': 'mean',
 'Mean amount of vehicle consumed on previous day among consumers (g/day)': 'mean',
 'Mean micronutrient intake per capita (mg/day)': 'mean',
 'Daily per capita consumption (g)': 'CHECK',
 'Daily consumption (mg/d)': 'CHECK',
 'Consumption per person per day (g)': 'CHECK',
 'Estimated daily contribution from fortified foods (mg/d)': 'CHECK',
 'kg/capita/year': 'CHECK',
 'Mean per capita consumption among consumers (g/day)':'mean'
}

value_d_to_entity = {
 'Median amount of vehicle consumed on previous day among consumers (g/day)': 'vehicle',
 'Mean per capita consumption (g/day)': 'CHECK',
 'Mean amount of vehicle consumed on previous day among consumers (g/day)': 'vehicle',
 'Mean micronutrient intake per capita (mg/day)': 'nutrient',
 'Daily per capita consumption (g)': 'CHECK',
 'Daily consumption (mg/d)': 'CHECK',
 'Consumption per person per day (g)': 'CHECK',
 'Estimated daily contribution from fortified foods (mg/d)': 'CHECK',
 'kg/capita/year': 'CHECK',
 'Mean per capita consumption among consumers (g/day)':'CHECK'
}

value_d_to_mass_unit = {
 'Median amount of vehicle consumed on previous day among consumers (g/day)': 'g',
 'Mean per capita consumption (g/day)': 'g',
 'Mean amount of vehicle consumed on previous day among consumers (g/day)': 'g',
 'Mean micronutrient intake per capita (mg/day)': 'mg',
 'Daily per capita consumption (g)': 'g',
 'Daily consumption (mg/d)': 'mg',
 'Consumption per person per day (g)': 'g',
 'Estimated daily contribution from fortified foods (mg/d)': 'mg',
 'kg/capita/year': 'kg',
 'Mean per capita consumption among consumers (g/day)':'g'
}

value_d_to_time_unit = {
 'Median amount of vehicle consumed on previous day among consumers (g/day)': 'day',
 'Mean per capita consumption (g/day)': 'day',
 'Mean amount of vehicle consumed on previous day among consumers (g/day)': 'day',
 'Mean micronutrient intake per capita (mg/day)': 'day',
 'Daily per capita consumption (g)': 'day',
 'Daily consumption (mg/d)': 'day',
 'Consumption per person per day (g)': 'day',
 'Estimated daily contribution from fortified foods (mg/d)': 'day',
 'kg/capita/year': 'year',
 'Mean per capita consumption among consumers (g/day)':'day'
}

value_d_to_population = {
 'Median amount of vehicle consumed on previous day among consumers (g/day)': 'consumers',
 'Mean per capita consumption (g/day)': 'capita CHECK',
 'Mean amount of vehicle consumed on previous day among consumers (g/day)': 'consumers',
 'Mean micronutrient intake per capita (mg/day)': 'capita CHECK',
 'Daily per capita consumption (g)': 'capita CHECK',
 'Daily consumption (mg/d)': 'CHECK',
 'Consumption per person per day (g)': 'CHECK',
 'Estimated daily contribution from fortified foods (mg/d)': 'CHECK',
 'kg/capita/year': 'CHECK',
 'Mean per capita consumption among consumers (g/day)':'consumers'
}

In [17]:
def format_value_d(df):
    df['metric'] = df.value_description.map(value_d_to_metric)
    df['entity'] = df.value_description.map(value_d_to_entity)
    df['mass_unit'] = df.value_description.map(value_d_to_mass_unit)
    df['time_unit'] = df.value_description.map(value_d_to_time_unit)
    df['pop_denom'] = df.value_description.map(value_d_to_population)
    
    return df

In [18]:
location_names

['Kenya', 'Burkina Faso', 'Myanmar', 'Vietnam', 'Nepal']

In [19]:
viewcols = ['location_name','subnational_name', 'metric', 'entity',
            'mass_unit', 'time_unit', 'pop_denom','vehicle',
            'value_mean','value_025_percentile','value_975_percentile',
            'sub_population','urbanicity',
            'source_citation', 'source_link', 'source_year', 'source_type']

## Kenya

In [20]:
kenya = gday[gday.location_name=="Kenya"]
kenya = format_value_d(kenya)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  This is separate from the ipykernel package so we can avoid doing imports until
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  after removing the cwd from sys.path.
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_ind

In [21]:
kenya

Unnamed: 0,location_id,location_name,subnational_name,subnational_location_id,urbanicity,vehicle,nutrient,value_description,value_mean,value_025_percentile,...,CI validated,scale_over_mean,r,lower,upper,metric,entity,mass_unit,time_unit,pop_denom
61,180,Kenya,na,,unknown,wheat flour,na,Mean per capita consumption (g/day),46.0,,...,,,0.699893,29.90247,62.09753,mean,CHECK,g,day,capita CHECK
62,180,Kenya,na,,mixed/both,wheat flour,na,Mean per capita consumption (g/day),100.11,,...,,,0.699893,65.076877,135.143123,mean,CHECK,g,day,capita CHECK
362,180,Kenya,na,,mixed/both,wheat flour,na,Mean per capita consumption (g/day),53.0,,...,,,0.699893,34.452846,71.547154,mean,CHECK,g,day,capita CHECK
363,180,Kenya,na,,mixed/both,wheat flour,na,Mean per capita consumption (g/day),69.0,,...,,,0.699893,44.853706,93.146294,mean,CHECK,g,day,capita CHECK
364,180,Kenya,na,,mixed/both,wheat flour,na,Mean per capita consumption (g/day),57.0,,...,,,0.699893,37.053061,76.946939,mean,CHECK,g,day,capita CHECK
365,180,Kenya,na,,mixed/both,wheat flour,na,Mean per capita consumption (g/day),63.0,,...,,,0.699893,40.953383,85.046617,mean,CHECK,g,day,capita CHECK
366,180,Kenya,na,,mixed/both,wheat flour,na,Mean per capita consumption (g/day),68.0,,...,,,0.699893,44.203652,91.796348,mean,CHECK,g,day,capita CHECK
367,180,Kenya,na,,mixed/both,wheat flour,na,Mean per capita consumption (g/day),71.0,,...,,,0.699893,46.153813,95.846187,mean,CHECK,g,day,capita CHECK
368,180,Kenya,na,,mixed/both,wheat flour,na,Mean per capita consumption (g/day),75.0,,...,,,0.699893,48.754028,101.245972,mean,CHECK,g,day,capita CHECK
369,180,Kenya,na,,mixed/both,wheat flour,na,Mean per capita consumption (g/day),69.0,,...,,,0.699893,44.853706,93.146294,mean,CHECK,g,day,capita CHECK


In [22]:
kenya.vehicle.unique()

array(['wheat flour', 'maize flour', 'wheat(not specifically flour)'],
      dtype=object)

In [23]:
kenya_wheat = kenya[(kenya.vehicle=="wheat flour")]
kenya_maize = kenya[(kenya.vehicle=="maize flour")]

In [24]:
kenya_wheat[viewcols]

Unnamed: 0,location_name,subnational_name,metric,entity,mass_unit,time_unit,pop_denom,vehicle,value_mean,value_025_percentile,value_975_percentile,sub_population,urbanicity,source_citation,source_link,source_year,source_type
61,Kenya,na,mean,CHECK,g,day,capita CHECK,wheat flour,46.0,,,total population,unknown,Fortification Handbook: Vitamin and Mineral Fo...,https://www.nutritionintl.org/content/user_fil...,1997-2000,estimated from FAO balance sheets
62,Kenya,na,mean,CHECK,g,day,capita CHECK,wheat flour,100.11,,,total population,mixed/both,GFDx,https://fortificationdata.org/country-fortific...,2017,estimated from FAO balance sheets
362,Kenya,na,mean,CHECK,g,day,capita CHECK,wheat flour,53.0,,,total population,mixed/both,GFDx,https://fortificationdata.org/full-gfdx-datasets/,1995,estimated from FAO balance sheets
363,Kenya,na,mean,CHECK,g,day,capita CHECK,wheat flour,69.0,,,total population,mixed/both,GFDx,https://fortificationdata.org/full-gfdx-datasets/,1996,estimated from FAO balance sheets
364,Kenya,na,mean,CHECK,g,day,capita CHECK,wheat flour,57.0,,,total population,mixed/both,GFDx,https://fortificationdata.org/full-gfdx-datasets/,1997,estimated from FAO balance sheets
365,Kenya,na,mean,CHECK,g,day,capita CHECK,wheat flour,63.0,,,total population,mixed/both,GFDx,https://fortificationdata.org/full-gfdx-datasets/,1998,estimated from FAO balance sheets
366,Kenya,na,mean,CHECK,g,day,capita CHECK,wheat flour,68.0,,,total population,mixed/both,GFDx,https://fortificationdata.org/full-gfdx-datasets/,1999,estimated from FAO balance sheets
367,Kenya,na,mean,CHECK,g,day,capita CHECK,wheat flour,71.0,,,total population,mixed/both,GFDx,https://fortificationdata.org/full-gfdx-datasets/,2000,estimated from FAO balance sheets
368,Kenya,na,mean,CHECK,g,day,capita CHECK,wheat flour,75.0,,,total population,mixed/both,GFDx,https://fortificationdata.org/full-gfdx-datasets/,2001,estimated from FAO balance sheets
369,Kenya,na,mean,CHECK,g,day,capita CHECK,wheat flour,69.0,,,total population,mixed/both,GFDx,https://fortificationdata.org/full-gfdx-datasets/,2002,estimated from FAO balance sheets


In [25]:
for i in kenya_wheat[viewcols].source_citation:
    print(i)

Fortification Handbook: Vitamin and Mineral Fortification of Wheat Flour and Maize Meal. Nutrition International. https://www.nutritionintl.org/learning-resource/fortification-handbook-vitamin-and-mineral-fortification-of-wheat-flour-and-maize-meal/ (accessed March 18, 2021).
GFDx
GFDx
GFDx
GFDx
GFDx
GFDx
GFDx
GFDx
GFDx
GFDx
GFDx
GFDx
GFDx
GFDx
GFDx
GFDx
GFDx
GFDx
GFDx
GFDx
GFDx
GFDx
GFDx


In [26]:
kenya_wheat.loc[(kenya_wheat.source_year=="2017"),"data_choice_notes"] = "Between forfication handbook from 1997-2000 number (46 g/day) and GFDx number from 2017 (100.11 g/day), used GFDx number, noting that GFDx has 57 g/day for 1997."

output = output.append(kenya_wheat.loc[(kenya_wheat.source_year=="2017")])

kenya_wheat.loc[(kenya_wheat.source_year=="2017")]

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self.obj[key] = _infer_fill_value(value)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self.obj[item] = s


Unnamed: 0,location_id,location_name,subnational_name,subnational_location_id,urbanicity,vehicle,nutrient,value_description,value_mean,value_025_percentile,...,scale_over_mean,r,lower,upper,metric,entity,mass_unit,time_unit,pop_denom,data_choice_notes
62,180,Kenya,na,,mixed/both,wheat flour,na,Mean per capita consumption (g/day),100.11,,...,,0.699893,65.076877,135.143123,mean,CHECK,g,day,capita CHECK,Between forfication handbook from 1997-2000 nu...


In [27]:
kenya_wheat.loc[(kenya_wheat.source_year=="2017"),'source_type']

62    estimated from FAO balance sheets
Name: source_type, dtype: object

In [28]:
kenya_maize.loc[(kenya_maize.source_year.isin(['2017','2007'])),viewcols]

Unnamed: 0,location_name,subnational_name,metric,entity,mass_unit,time_unit,pop_denom,vehicle,value_mean,value_025_percentile,value_975_percentile,sub_population,urbanicity,source_citation,source_link,source_year,source_type
630,Kenya,na,mean,CHECK,g,day,capita CHECK,maize flour,216.68,,,total population,mixed/both,GFDx,https://fortificationdata.org/country-fortific...,2017,estimated from FAO balance sheets
631,Kenya,na,mean,CHECK,g,day,capita CHECK,maize flour,221.7,,,total population,mixed/both,"Nuss, E. T., & Tanumihardjo, S. A. (2011). Qua...",https://doi.org/10.3945/an.110.000182,2007,estimated from FAO balance sheets
797,Kenya,na,mean,CHECK,g,day,capita CHECK,maize flour,216.0,,,total population,mixed/both,GFDx,https://fortificationdata.org/full-gfdx-datasets/,2007,estimated from FAO balance sheets


In [29]:
kenya_maize.loc[(kenya_maize.source_year.isin(['2017'])),viewcols]

Unnamed: 0,location_name,subnational_name,metric,entity,mass_unit,time_unit,pop_denom,vehicle,value_mean,value_025_percentile,value_975_percentile,sub_population,urbanicity,source_citation,source_link,source_year,source_type
630,Kenya,na,mean,CHECK,g,day,capita CHECK,maize flour,216.68,,,total population,mixed/both,GFDx,https://fortificationdata.org/country-fortific...,2017,estimated from FAO balance sheets


In [30]:
kenya_maize.loc[(kenya_maize.source_year.isin(['2017'])),"data_choice_notes"] = "Discarded 2007 Nuss paper that estimated 221.7 g/day from FAO balance sheets, and kept GFDx 2017 estimate of 216.68 g/day, also estimated from FAO balance sheets "

output = output.append(kenya_maize.loc[(kenya_maize.source_year.isin(['2017']))])

kenya_maize.loc[(kenya_maize.source_year.isin(['2017']))]

Unnamed: 0,location_id,location_name,subnational_name,subnational_location_id,urbanicity,vehicle,nutrient,value_description,value_mean,value_025_percentile,...,scale_over_mean,r,lower,upper,metric,entity,mass_unit,time_unit,pop_denom,data_choice_notes
630,180,Kenya,na,,mixed/both,maize flour,na,Mean per capita consumption (g/day),216.68,,...,,0.819483,127.897166,305.462834,mean,CHECK,g,day,capita CHECK,Discarded 2007 Nuss paper that estimated 221.7...


## Burkina Faso

In [31]:
bf = gday[gday.location_name=="Burkina Faso"]
bf = format_value_d(bf)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  This is separate from the ipykernel package so we can avoid doing imports until
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  after removing the cwd from sys.path.
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_ind

In [32]:
bf.vehicle.unique()

array(['wheat flour', 'maize flour', 'wheat(not specifically flour)',
       'bouillon'], dtype=object)

In [33]:
bf_wheat = bf[(bf.vehicle=="wheat flour")]
bf_maize = bf[(bf.vehicle=="maize flour")]

In [34]:
bf_wheat[['source_citation','source_year','sub_population','value_mean','source_type']]

Unnamed: 0,source_citation,source_year,sub_population,value_mean,source_type
12,"Hess SY, Brown KH, Sablah M, Engle-Stone R, Aa...",1999,women of reproductive age,49.0,Survey - cross-sectional
13,"Hess SY, Brown KH, Sablah M, Engle-Stone R, Aa...",1999,women of reproductive age,65.0,Survey - cross-sectional
14,"Hess SY, Brown KH, Sablah M, Engle-Stone R, Aa...",1999,women of reproductive age,21.0,Survey - cross-sectional
15,"Hess SY, Brown KH, Sablah M, Engle-Stone R, Aa...",1999,women of reproductive age,47.0,Survey - cross-sectional
16,"Hess SY, Brown KH, Sablah M, Engle-Stone R, Aa...",1999,women of reproductive age,55.0,Survey - cross-sectional
17,"Hess SY, Brown KH, Sablah M, Engle-Stone R, Aa...",1999,children 12-36 months,32.0,Survey - cross-sectional
18,"Hess SY, Brown KH, Sablah M, Engle-Stone R, Aa...",1999,children 12-36 months,30.0,Survey - cross-sectional
19,"Hess SY, Brown KH, Sablah M, Engle-Stone R, Aa...",1999,children 12-36 months,15.0,Survey - cross-sectional
20,"Hess SY, Brown KH, Sablah M, Engle-Stone R, Aa...",1999,children 12-36 months,38.0,Survey - cross-sectional
21,"Hess SY, Brown KH, Sablah M, Engle-Stone R, Aa...",1999,children 12-36 months,40.0,Survey - cross-sectional


In [35]:
bf_wheat[(bf_wheat.source_year=="2017")]

Unnamed: 0,location_id,location_name,subnational_name,subnational_location_id,urbanicity,vehicle,nutrient,value_description,value_mean,value_025_percentile,...,CI validated,scale_over_mean,r,lower,upper,metric,entity,mass_unit,time_unit,pop_denom
22,201,Burkina Faso,na,,unknown,wheat flour,na,Mean per capita consumption (g/day),37.64,,...,,,0.699893,24.468022,50.811978,mean,CHECK,g,day,capita CHECK


In [36]:
bf_wheat.loc[(bf_wheat.source_year=="2017"),"data_choice_notes"] = "Discarded 1999 Hess and Engle-Stone survey number (15-40 g/day) and a 2002 Sablah total population survey number (14.5 g/day) for a 2017 total-pop FAO balance sheet number (37 g/day), as GFDx estimates 11 g/day for 2002 and 20 g/day for 1999"

output = output.append(bf_wheat.loc[(bf_wheat.source_year=="2017"),])

bf_wheat.loc[(bf_wheat.source_year=="2017"),]

Unnamed: 0,location_id,location_name,subnational_name,subnational_location_id,urbanicity,vehicle,nutrient,value_description,value_mean,value_025_percentile,...,scale_over_mean,r,lower,upper,metric,entity,mass_unit,time_unit,pop_denom,data_choice_notes
22,201,Burkina Faso,na,,unknown,wheat flour,na,Mean per capita consumption (g/day),37.64,,...,,0.699893,24.468022,50.811978,mean,CHECK,g,day,capita CHECK,Discarded 1999 Hess and Engle-Stone survey num...


In [37]:
bf_maize[['source_citation','source_year','sub_population','value_mean','source_type']]

Unnamed: 0,source_citation,source_year,sub_population,value_mean,source_type
612,GFDx,2017,total population,210.99,estimated from FAO balance sheets
613,"Nuss, E. T., & Tanumihardjo, S. A. (2011). Qua...",2007,total population,151.9,estimated from FAO balance sheets
614,"Ranum, P., Peña‐Rosas, J. P., & Garcia‐Casal, ...",2007-2009,total population,107.0,estimated from FAO balance sheets
763,GFDx,1995,total population,95.0,estimated from FAO balance sheets
764,GFDx,1996,total population,64.0,estimated from FAO balance sheets
765,GFDx,1997,total population,92.0,estimated from FAO balance sheets
766,GFDx,1998,total population,97.0,estimated from FAO balance sheets
767,GFDx,1999,total population,63.0,estimated from FAO balance sheets
768,GFDx,2000,total population,127.0,estimated from FAO balance sheets
769,GFDx,2001,total population,91.0,estimated from FAO balance sheets


In [38]:
bf_maize.loc[(bf_maize.source_year=="2017"),"data_choice_notes"] = "Discarded 2007 total pop Nuss estimate from FAO balanc esheets (152 g/day) and Ranum 2007-2009 estimate from FAO (107 g/day) in lieu of GFDx estimate from FAO balance sheets from 2017, given that GFDx 2007 estimate was 157 g/day"

output = output.append(bf_maize.loc[(bf_maize.source_year=="2017"),])

bf_maize.loc[(bf_maize.source_year=="2017"),]

Unnamed: 0,location_id,location_name,subnational_name,subnational_location_id,urbanicity,vehicle,nutrient,value_description,value_mean,value_025_percentile,...,scale_over_mean,r,lower,upper,metric,entity,mass_unit,time_unit,pop_denom,data_choice_notes
612,201,Burkina Faso,na,,unknown,maize flour,na,Mean per capita consumption (g/day),210.99,,...,,0.819483,124.538596,297.441404,mean,CHECK,g,day,capita CHECK,Discarded 2007 total pop Nuss estimate from FA...


## Myanmar

In [39]:
checkcols = ['source_citation','source_year','sub_population','value_mean','source_type']

In [40]:
myanmar = gday[gday.location_name=="Myanmar"]
myanmar = format_value_d(myanmar)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  This is separate from the ipykernel package so we can avoid doing imports until
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  after removing the cwd from sys.path.
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_ind

In [41]:
myanmar.vehicle.unique()

array(['wheat flour', 'maize flour'], dtype=object)

In [42]:
myWheat = myanmar[(myanmar.vehicle=="wheat flour")]
myMaize = myanmar[(myanmar.vehicle=="maize flour")]

In [43]:
myWheat[checkcols]

Unnamed: 0,source_citation,source_year,sub_population,value_mean,source_type
67,Fortification Handbook: Vitamin and Mineral Fo...,1997-2000,total population,7.0,estimated from FAO balance sheets
68,GFDx,2017,total population,29.92,estimated from FAO balance sheets
69,FAO food balance sheet,,women of reproductive age,10.92,estimated from FAO balance sheets
274,GFDx,1995,total population,9.0,estimated from FAO balance sheets
275,GFDx,1996,total population,8.0,estimated from FAO balance sheets
276,GFDx,1997,total population,7.0,estimated from FAO balance sheets
277,GFDx,1998,total population,7.0,estimated from FAO balance sheets
278,GFDx,1999,total population,11.0,estimated from FAO balance sheets
279,GFDx,2000,total population,10.0,estimated from FAO balance sheets
280,GFDx,2001,total population,11.0,estimated from FAO balance sheets


In [44]:
myWheat.loc[(myWheat.source_year=='2017'),"data_choice_notes"] = "Discarded fortification handbok number (7 g/day) from 1997-2000 estimated from FAO balance sheets in lieu of 2017 GFDx number from FAO balance sheets"

output = output.append(myWheat.loc[(myWheat.source_year=='2017'),])

myWheat.loc[(myWheat.source_year=='2017'),]

Unnamed: 0,location_id,location_name,subnational_name,subnational_location_id,urbanicity,vehicle,nutrient,value_description,value_mean,value_025_percentile,...,scale_over_mean,r,lower,upper,metric,entity,mass_unit,time_unit,pop_denom,data_choice_notes
68,15,Myanmar,na,,unknown,wheat flour,na,Mean per capita consumption (g/day),29.92,,...,,0.699893,19.449607,40.390393,mean,CHECK,g,day,capita CHECK,Discarded fortification handbok number (7 g/da...


In [45]:
myMaize.columns

Index(['location_id', 'location_name', 'subnational_name',
       'subnational_location_id', 'urbanicity', 'vehicle', 'nutrient',
       'value_description', 'value_mean', 'value_025_percentile',
       'value_975_percentile', 'sub_population', 'source_citation',
       'source_link', 'source_year', 'source_type', 'notes', 'user',
       'date_recorded', 'definition validated', 'CI validated',
       'scale_over_mean', 'r', 'lower', 'upper', 'metric', 'entity',
       'mass_unit', 'time_unit', 'pop_denom'],
      dtype='object')

In [46]:
myMaize[checkcols + ['value_025_percentile','value_975_percentile']]

Unnamed: 0,source_citation,source_year,sub_population,value_mean,source_type,value_025_percentile,value_975_percentile
636,FAO food balance sheet,,women of reproductive age,4.52,estimated from FAO balance sheets,,


In [47]:
myMaize["data_choice_notes"] = "Only one source."

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  """Entry point for launching an IPython kernel.


In [48]:
output = output.append(myMaize)

myMaize

Unnamed: 0,location_id,location_name,subnational_name,subnational_location_id,urbanicity,vehicle,nutrient,value_description,value_mean,value_025_percentile,...,scale_over_mean,r,lower,upper,metric,entity,mass_unit,time_unit,pop_denom,data_choice_notes
636,15,Myanmar,na,,unknown,maize flour,na,kg/capita/year,4.52,,...,,0.819483,2.667967,6.372033,CHECK,CHECK,kg,year,CHECK,Only one source.


## Vietnam

In [49]:
vietnam = gday[gday.location_name.isin(['Vietnam','Viet Nam'])]
vietnam = format_value_d(vietnam)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  This is separate from the ipykernel package so we can avoid doing imports until
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  after removing the cwd from sys.path.
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_ind

In [50]:
vietnam.vehicle.unique()

array(['wheat flour'], dtype=object)

In [51]:
vietnam[checkcols].source_citation.unique()

array(['Improvement of the Vietnamese Diet for Women of Reproductive Age by Micronutrient Fortification of Staples Foods and Condiments. Arnaud Laillou ,Jacques Berger,Bach Mai Le,Van Thuy Pham,Thi Hop Le,Cong Khan Nguyen,Dora Panagides,Fabian Rohner,Frank Wieringa,Regina Moench-Pfanner.Published: November 30, 2012https://doi.org/10.1371/journal.pone.0050538',
       'Fortification Handbook: Vitamin and Mineral Fortification of Wheat Flour and Maize Meal. Nutrition International. https://www.nutritionintl.org/learning-resource/fortification-handbook-vitamin-and-mineral-fortification-of-wheat-flour-and-maize-meal/ (accessed March 18, 2021).',
       'GFDx'], dtype=object)

In [52]:
vietnam[checkcols]

Unnamed: 0,source_citation,source_year,sub_population,value_mean,source_type
93,Improvement of the Vietnamese Diet for Women o...,2010,women of reproductive age,1.3,Survey - cross-sectional
94,Improvement of the Vietnamese Diet for Women o...,2010,women of reproductive age,36.0,Survey - cross-sectional
95,Improvement of the Vietnamese Diet for Women o...,2010,women of reproductive age,33.8,Survey - cross-sectional
96,Fortification Handbook: Vitamin and Mineral Fo...,1997-2000,total population,14.0,Survey - other/unknown
97,GFDx,2017,total population,30.36,estimated from FAO balance sheets
340,GFDx,1995,total population,16.0,estimated from FAO balance sheets
341,GFDx,1996,total population,16.0,estimated from FAO balance sheets
342,GFDx,1997,total population,15.0,estimated from FAO balance sheets
343,GFDx,1998,total population,18.0,estimated from FAO balance sheets
344,GFDx,1999,total population,18.0,estimated from FAO balance sheets


In [53]:
vietnam.loc[(vietnam.source_year=="2017"),"data_choice_notes"] = "Discarded 2010 Laillou survey number for WRA (1-34 g/day) and 1997-2000 fortification handbook survey number (14 g/day) in lieu of GFDx 2017 number (30.36 g/day)"

output = output.append(
    vietnam.loc[(vietnam.source_year=="2017"),]
)

vietnam.loc[(vietnam.source_year=="2017"),]

Unnamed: 0,location_id,location_name,subnational_name,subnational_location_id,urbanicity,vehicle,nutrient,value_description,value_mean,value_025_percentile,...,scale_over_mean,r,lower,upper,metric,entity,mass_unit,time_unit,pop_denom,data_choice_notes
97,20,Viet Nam,na,,mixed/both,wheat flour,na,Mean per capita consumption (g/day),30.36,,...,,0.699893,19.735631,40.984369,mean,CHECK,g,day,capita CHECK,Discarded 2010 Laillou survey number for WRA (...


## Nepal

In [54]:
nepal = gday[gday.location_name=="Nepal"]
nepal = format_value_d(nepal)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  This is separate from the ipykernel package so we can avoid doing imports until
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  after removing the cwd from sys.path.
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_ind

In [55]:
nepal.vehicle.unique()

array(['wheat flour'], dtype=object)

In [56]:
nepal[checkcols]

Unnamed: 0,source_citation,source_year,sub_population,value_mean,source_type
70,Centers for Disease Control and Prevention (CD...,2016,total population,6.1,Survey - cross-sectional
71,Centers for Disease Control and Prevention (CD...,2016,total population,5.6,Survey - cross-sectional
72,Centers for Disease Control and Prevention (CD...,2016,total population,5.7,Survey - cross-sectional
73,Fortification Handbook: Vitamin and Mineral Fo...,1997-2000,total population,70.0,estimated from FAO balance sheets
74,GFDx,2017,total population,144.85,estimated from FAO balance sheets
75,Fortifying Flour Where People Eat Rice. Hunger...,2007,total population,104.0,estimated from FAO balance sheets
450,GFDx,1995,total population,100.0,estimated from FAO balance sheets
451,GFDx,1996,total population,101.0,estimated from FAO balance sheets
452,GFDx,1997,total population,107.0,estimated from FAO balance sheets
453,GFDx,1998,total population,104.0,estimated from FAO balance sheets


In [57]:
nepal.columns

Index(['location_id', 'location_name', 'subnational_name',
       'subnational_location_id', 'urbanicity', 'vehicle', 'nutrient',
       'value_description', 'value_mean', 'value_025_percentile',
       'value_975_percentile', 'sub_population', 'source_citation',
       'source_link', 'source_year', 'source_type', 'notes', 'user',
       'date_recorded', 'definition validated', 'CI validated',
       'scale_over_mean', 'r', 'lower', 'upper', 'metric', 'entity',
       'mass_unit', 'time_unit', 'pop_denom'],
      dtype='object')

In [58]:
nepal.loc[(nepal.source_year=="2016"),['urbanicity','vehicle','nutrient','value_description','value_mean','source_type','source_link','user','notes']]

Unnamed: 0,urbanicity,vehicle,nutrient,value_description,value_mean,source_type,source_link,user,notes
70,urban,wheat flour,na,Mean per capita consumption among consumers (g...,6.1,Survey - cross-sectional,http://internal-ghdx.healthdata.org/record/nep...,paulina,Table 17.4 shows the per-capita availability o...
71,rural,wheat flour,na,Mean per capita consumption among consumers (g...,5.6,Survey - cross-sectional,http://internal-ghdx.healthdata.org/record/nep...,paulina,Table 17.4 shows the per-capita availability o...
72,mixed/both,wheat flour,na,Mean per capita consumption among consumers (g...,5.7,Survey - cross-sectional,http://internal-ghdx.healthdata.org/record/nep...,paulina,Table 17.4 shows the per-capita availability o...
471,mixed/both,wheat flour,na,Mean per capita consumption (g/day),143.32,estimated from FAO balance sheets,https://fortificationdata.org/full-gfdx-datasets/,paulina,FAO. Food Supply - Crops Primary Equivalent. I...


In [59]:
nepal.loc[(nepal.urbanicity=="mixed/both") & (nepal.source_type=="Survey - cross-sectional"),
         "data_choice_notes"] = "Discarded GFDx FAO-balance sheet estimate from 2017 (145 g/day) in lieu of 2016 Nepal Micronutrient status survey finding (5.7 g/day)"

output = output.append(
    nepal.loc[(nepal.urbanicity=="mixed/both") & (nepal.source_type=="Survey - cross-sectional")]
)

nepal.loc[(nepal.urbanicity=="mixed/both") & (nepal.source_type=="Survey - cross-sectional")]

Unnamed: 0,location_id,location_name,subnational_name,subnational_location_id,urbanicity,vehicle,nutrient,value_description,value_mean,value_025_percentile,...,scale_over_mean,r,lower,upper,metric,entity,mass_unit,time_unit,pop_denom,data_choice_notes
72,-1,Nepal,na,,mixed/both,wheat flour,na,Mean per capita consumption among consumers (g...,5.7,,...,,0.699893,3.705306,7.694694,mean,CHECK,g,day,consumers,Discarded GFDx FAO-balance sheet estimate from...


In [60]:
for i in nepal.loc[(nepal.urbanicity=="mixed/both") & (nepal.source_type=="Survey - cross-sectional")].value_description:
    print(i)

Mean per capita consumption among consumers (g/day)


# Format output

In [61]:
group_cols = ['location_id','location_name','sub_population','vehicle','metric','mass_unit','time_unit','pop_denom','source_citation','source_link','data_choice_notes']

In [62]:
loc_metadata = get_locs(location_set_id=35, gbd_round_id=6, decomp_step="step4")

In [63]:
output = output.drop(columns = 'location_id')
output = loc_metadata[['location_id','location_name']].merge(output, on = 'location_name', how = 'right')

In [64]:
output[['location_id','location_name','vehicle','value_mean','definition validated', 'CI validated']]

Unnamed: 0,location_id,location_name,vehicle,value_mean,definition validated,CI validated
0,164,Nepal,wheat flour,5.7,Yes,
1,15,Myanmar,wheat flour,29.92,Yes,
2,15,Myanmar,maize flour,4.52,cannot locate source to verify,
3,20,Viet Nam,wheat flour,30.36,Yes,
4,180,Kenya,wheat flour,100.11,Yes,
5,180,Kenya,maize flour,216.68,Yes,
6,201,Burkina Faso,wheat flour,37.64,Yes,
7,201,Burkina Faso,maize flour,210.99,Yes,


In [66]:
output[['location_id','location_name','vehicle','value_mean','value_025_percentile','value_975_percentile','pop_denom','definition validated', 'CI validated']]

Unnamed: 0,location_id,location_name,vehicle,value_mean,value_025_percentile,value_975_percentile,pop_denom,definition validated,CI validated
0,164,Nepal,wheat flour,5.7,,,consumers,Yes,
1,15,Myanmar,wheat flour,29.92,,,capita CHECK,Yes,
2,15,Myanmar,maize flour,4.52,,,CHECK,cannot locate source to verify,
3,20,Viet Nam,wheat flour,30.36,,,capita CHECK,Yes,
4,180,Kenya,wheat flour,100.11,,,capita CHECK,Yes,
5,180,Kenya,maize flour,216.68,,,capita CHECK,Yes,
6,201,Burkina Faso,wheat flour,37.64,,,capita CHECK,Yes,
7,201,Burkina Faso,maize flour,210.99,,,capita CHECK,Yes,


In [67]:
metric_map = {
    'mean':'mean',
    'CHECK':'mean'
}

pop_denom_map = {
    'capita':'capita',
    'CHECK':'capita',
    'capita CHECK':'capita',
    'consumers':'consumers'
}

output.metric = output.metric.map(metric_map)
output.pop_denom = output.pop_denom.map(pop_denom_map)

In [68]:
output[['location_id','location_name','vehicle','value_mean','pop_denom','definition validated', 'CI validated']]

Unnamed: 0,location_id,location_name,vehicle,value_mean,pop_denom,definition validated,CI validated
0,164,Nepal,wheat flour,5.7,consumers,Yes,
1,15,Myanmar,wheat flour,29.92,capita,Yes,
2,15,Myanmar,maize flour,4.52,capita,cannot locate source to verify,
3,20,Viet Nam,wheat flour,30.36,capita,Yes,
4,180,Kenya,wheat flour,100.11,capita,Yes,
5,180,Kenya,maize flour,216.68,capita,Yes,
6,201,Burkina Faso,wheat flour,37.64,capita,Yes,
7,201,Burkina Faso,maize flour,210.99,capita,Yes,


In [69]:
output[['value_mean'] + group_cols]

Unnamed: 0,value_mean,location_id,location_name,sub_population,vehicle,metric,mass_unit,time_unit,pop_denom,source_citation,source_link,data_choice_notes
0,5.7,164,Nepal,total population,wheat flour,mean,g,day,consumers,Centers for Disease Control and Prevention (CD...,http://internal-ghdx.healthdata.org/record/nep...,Discarded GFDx FAO-balance sheet estimate from...
1,29.92,15,Myanmar,total population,wheat flour,mean,g,day,capita,GFDx,https://fortificationdata.org/country-fortific...,Discarded fortification handbok number (7 g/da...
2,4.52,15,Myanmar,women of reproductive age,maize flour,mean,kg,year,capita,FAO food balance sheet,,Only one source.
3,30.36,20,Viet Nam,total population,wheat flour,mean,g,day,capita,GFDx,https://fortificationdata.org/country-fortific...,Discarded 2010 Laillou survey number for WRA (...
4,100.11,180,Kenya,total population,wheat flour,mean,g,day,capita,GFDx,https://fortificationdata.org/country-fortific...,Between forfication handbook from 1997-2000 nu...
5,216.68,180,Kenya,total population,maize flour,mean,g,day,capita,GFDx,https://fortificationdata.org/country-fortific...,Discarded 2007 Nuss paper that estimated 221.7...
6,37.64,201,Burkina Faso,total population,wheat flour,mean,g,day,capita,GFDx,https://fortificationdata.org/country-fortific...,Discarded 1999 Hess and Engle-Stone survey num...
7,210.99,201,Burkina Faso,total population,maize flour,mean,g,day,capita,GFDx,https://fortificationdata.org/country-fortific...,Discarded 2007 total pop Nuss estimate from FA...


In [74]:
output.columns

Index(['location_id', 'location_name', 'subnational_name',
       'subnational_location_id', 'urbanicity', 'vehicle', 'nutrient',
       'value_description', 'value_mean', 'value_025_percentile',
       'value_975_percentile', 'sub_population', 'source_citation',
       'source_link', 'source_year', 'source_type', 'notes', 'user',
       'date_recorded', 'definition validated', 'CI validated',
       'scale_over_mean', 'r', 'lower', 'upper', 'metric', 'entity',
       'mass_unit', 'time_unit', 'pop_denom', 'data_choice_notes'],
      dtype='object')

In [70]:
output.loc[(output.mass_unit=="kg"),'value_mean'] = output.value_mean * 1_000

output.loc[(output.mass_unit=="kg"),'lower'] = output.lower * 1_000
output.loc[(output.mass_unit=="kg"),'upper'] = output.upper * 1_000

output.loc[(output.mass_unit=="kg"),'value_025_percentile'] = output.value_025_percentile * 1_000
output.loc[(output.mass_unit=="kg"),'value_975_percentile'] = output.value_975_percentile * 1_000

output.loc[(output.mass_unit=="kg"),'mass_unit'] = 'g'

In [71]:
output.loc[(output.time_unit=="year"),'value_mean'] = output.value_mean / 365

output.loc[(output.time_unit=="year"),'lower'] = output.lower / 365
output.loc[(output.time_unit=="year"),'upper'] = output.upper / 365

output.loc[(output.time_unit=="year"),'value_025_percentile'] = output.value_025_percentile / 365
output.loc[(output.time_unit=="year"),'value_975_percentile'] = output.value_975_percentile / 365

output.loc[(output.time_unit=="year"),'time_unit'] = 'day'

In [72]:
output[['value_mean'] + group_cols]

Unnamed: 0,value_mean,location_id,location_name,sub_population,vehicle,metric,mass_unit,time_unit,pop_denom,source_citation,source_link,data_choice_notes
0,5.7,164,Nepal,total population,wheat flour,mean,g,day,consumers,Centers for Disease Control and Prevention (CD...,http://internal-ghdx.healthdata.org/record/nep...,Discarded GFDx FAO-balance sheet estimate from...
1,29.92,15,Myanmar,total population,wheat flour,mean,g,day,capita,GFDx,https://fortificationdata.org/country-fortific...,Discarded fortification handbok number (7 g/da...
2,12.383562,15,Myanmar,women of reproductive age,maize flour,mean,g,day,capita,FAO food balance sheet,,Only one source.
3,30.36,20,Viet Nam,total population,wheat flour,mean,g,day,capita,GFDx,https://fortificationdata.org/country-fortific...,Discarded 2010 Laillou survey number for WRA (...
4,100.11,180,Kenya,total population,wheat flour,mean,g,day,capita,GFDx,https://fortificationdata.org/country-fortific...,Between forfication handbook from 1997-2000 nu...
5,216.68,180,Kenya,total population,maize flour,mean,g,day,capita,GFDx,https://fortificationdata.org/country-fortific...,Discarded 2007 Nuss paper that estimated 221.7...
6,37.64,201,Burkina Faso,total population,wheat flour,mean,g,day,capita,GFDx,https://fortificationdata.org/country-fortific...,Discarded 1999 Hess and Engle-Stone survey num...
7,210.99,201,Burkina Faso,total population,maize flour,mean,g,day,capita,GFDx,https://fortificationdata.org/country-fortific...,Discarded 2007 total pop Nuss estimate from FA...


In [73]:
output[['value_mean','lower','upper'] + group_cols]

Unnamed: 0,value_mean,lower,upper,location_id,location_name,sub_population,vehicle,metric,mass_unit,time_unit,pop_denom,source_citation,source_link,data_choice_notes
0,5.7,3.705306,7.694694,164,Nepal,total population,wheat flour,mean,g,day,consumers,Centers for Disease Control and Prevention (CD...,http://internal-ghdx.healthdata.org/record/nep...,Discarded GFDx FAO-balance sheet estimate from...
1,29.92,19.449607,40.390393,15,Myanmar,total population,wheat flour,mean,g,day,capita,GFDx,https://fortificationdata.org/country-fortific...,Discarded fortification handbok number (7 g/da...
2,12.383562,2.667967,6.372033,15,Myanmar,women of reproductive age,maize flour,mean,g,day,capita,FAO food balance sheet,,Only one source.
3,30.36,19.735631,40.984369,20,Viet Nam,total population,wheat flour,mean,g,day,capita,GFDx,https://fortificationdata.org/country-fortific...,Discarded 2010 Laillou survey number for WRA (...
4,100.11,65.076877,135.143123,180,Kenya,total population,wheat flour,mean,g,day,capita,GFDx,https://fortificationdata.org/country-fortific...,Between forfication handbook from 1997-2000 nu...
5,216.68,127.897166,305.462834,180,Kenya,total population,maize flour,mean,g,day,capita,GFDx,https://fortificationdata.org/country-fortific...,Discarded 2007 Nuss paper that estimated 221.7...
6,37.64,24.468022,50.811978,201,Burkina Faso,total population,wheat flour,mean,g,day,capita,GFDx,https://fortificationdata.org/country-fortific...,Discarded 1999 Hess and Engle-Stone survey num...
7,210.99,124.538596,297.441404,201,Burkina Faso,total population,maize flour,mean,g,day,capita,GFDx,https://fortificationdata.org/country-fortific...,Discarded 2007 total pop Nuss estimate from FA...


In [75]:
output = output.rename(columns = {
    'source_citation':'mean_value_source_citation',
    'source_link':'mean_value_source_link'
})

In [76]:
assert(len(output[output.value_mean >= output.upper])==0), "check upper"
assert(len(output[output.value_mean <= output.lower])==0), "check lower"

# check for missing values

In [78]:
output.loc[(output.location_name=="Viet Nam"),'location_name'] = "Vietnam"

In [79]:
## load legal combos
import pickle
data_prep_dir = '/ihme/homes/beatrixh/vivarium_research_lsff/data_prep/inputs/'

with open(data_prep_dir + 'lsff_vehicle_nutrient_pairs.pickle', 'rb') as handle:
    vehicle_nutrient_map = pickle.load(handle)
    
with open(data_prep_dir + 'lsff_country_vehicle_pairs.pickle', 'rb') as handle:
    country_vehicle_map = pickle.load(handle)
    
with open(data_prep_dir + 'lsff_vehicle_country_pairs.pickle', 'rb') as handle:
    vehicle_country_map = pickle.load(handle)

In [80]:
target_vehicles = [i for i in vehicle_nutrient_map.keys() if 'iron' in vehicle_nutrient_map[i]]
set([i for j in location_names for i in country_vehicle_map[j]])

{'bouillon', 'maize flour', 'oil', 'wheat flour'}

In [81]:
# make sure nothing missing. note we're not interested in oil/iron
output.reset_index().merge(target.reset_index(), on = ['location_name','vehicle'], how = 'outer')

Unnamed: 0,index,location_id,location_name,subnational_name,subnational_location_id,urbanicity,vehicle,nutrient,value_description,value_mean,...,scale_over_mean,r,lower,upper,metric,entity,mass_unit,time_unit,pop_denom,data_choice_notes
0,0.0,164.0,Nepal,na,,mixed/both,wheat flour,na,Mean per capita consumption among consumers (g...,5.7,...,,0.699893,3.705306,7.694694,mean,CHECK,g,day,consumers,Discarded GFDx FAO-balance sheet estimate from...
1,1.0,15.0,Myanmar,na,,unknown,wheat flour,na,Mean per capita consumption (g/day),29.92,...,,0.699893,19.449607,40.390393,mean,CHECK,g,day,capita,Discarded fortification handbok number (7 g/da...
2,2.0,15.0,Myanmar,na,,unknown,maize flour,na,kg/capita/year,12.383562,...,,0.819483,2.667967,6.372033,mean,CHECK,g,day,capita,Only one source.
3,3.0,20.0,Vietnam,na,,mixed/both,wheat flour,na,Mean per capita consumption (g/day),30.36,...,,0.699893,19.735631,40.984369,mean,CHECK,g,day,capita,Discarded 2010 Laillou survey number for WRA (...
4,4.0,180.0,Kenya,na,,mixed/both,wheat flour,na,Mean per capita consumption (g/day),100.11,...,,0.699893,65.076877,135.143123,mean,CHECK,g,day,capita,Between forfication handbook from 1997-2000 nu...
5,5.0,180.0,Kenya,na,,mixed/both,maize flour,na,Mean per capita consumption (g/day),216.68,...,,0.819483,127.897166,305.462834,mean,CHECK,g,day,capita,Discarded 2007 Nuss paper that estimated 221.7...
6,6.0,201.0,Burkina Faso,na,,unknown,wheat flour,na,Mean per capita consumption (g/day),37.64,...,,0.699893,24.468022,50.811978,mean,CHECK,g,day,capita,Discarded 1999 Hess and Engle-Stone survey num...
7,7.0,201.0,Burkina Faso,na,,unknown,maize flour,na,Mean per capita consumption (g/day),210.99,...,,0.819483,124.538596,297.441404,mean,CHECK,g,day,capita,Discarded 2007 total pop Nuss estimate from FA...
8,,,Burkina Faso,,,,bouillon,,,,...,,,,,,,,,,
9,,,Burkina Faso,,,,oil,,,,...,,,,,,,,,,


In [82]:
#reorder columns
output = output[['location_id', 'location_name', 'sub_population', 'vehicle', 'metric',
       'mass_unit', 'time_unit', 'pop_denom','value_mean', 'lower',
       'upper', 'mean_value_source_citation',
       'mean_value_source_link', 'data_choice_notes']]

In [83]:
output

Unnamed: 0,location_id,location_name,sub_population,vehicle,metric,mass_unit,time_unit,pop_denom,value_mean,lower,upper,mean_value_source_citation,mean_value_source_link,data_choice_notes
0,164,Nepal,total population,wheat flour,mean,g,day,consumers,5.7,3.705306,7.694694,Centers for Disease Control and Prevention (CD...,http://internal-ghdx.healthdata.org/record/nep...,Discarded GFDx FAO-balance sheet estimate from...
1,15,Myanmar,total population,wheat flour,mean,g,day,capita,29.92,19.449607,40.390393,GFDx,https://fortificationdata.org/country-fortific...,Discarded fortification handbok number (7 g/da...
2,15,Myanmar,women of reproductive age,maize flour,mean,g,day,capita,12.383562,2.667967,6.372033,FAO food balance sheet,,Only one source.
3,20,Vietnam,total population,wheat flour,mean,g,day,capita,30.36,19.735631,40.984369,GFDx,https://fortificationdata.org/country-fortific...,Discarded 2010 Laillou survey number for WRA (...
4,180,Kenya,total population,wheat flour,mean,g,day,capita,100.11,65.076877,135.143123,GFDx,https://fortificationdata.org/country-fortific...,Between forfication handbook from 1997-2000 nu...
5,180,Kenya,total population,maize flour,mean,g,day,capita,216.68,127.897166,305.462834,GFDx,https://fortificationdata.org/country-fortific...,Discarded 2007 Nuss paper that estimated 221.7...
6,201,Burkina Faso,total population,wheat flour,mean,g,day,capita,37.64,24.468022,50.811978,GFDx,https://fortificationdata.org/country-fortific...,Discarded 1999 Hess and Engle-Stone survey num...
7,201,Burkina Faso,total population,maize flour,mean,g,day,capita,210.99,124.538596,297.441404,GFDx,https://fortificationdata.org/country-fortific...,Discarded 2007 total pop Nuss estimate from FA...


In [84]:
output = output.sort_values(['location_name','vehicle'])

In [85]:
save_path = '/ihme/homes/beatrixh/vivarium_research_lsff/data_prep/outputs/gday_tier3_04_01_2021.csv'
output.to_csv(save_path, index = False)

In [86]:
save_path = '/ihme/homes/beatrixh/repos/scratch/gday_tier3_04_01_2021.csv'
output.to_csv(save_path, index = False)