In [1]:
from db_queries import get_population, get_ids
from db_queries import get_location_metadata as get_locs

In [2]:
import pandas as pd, numpy as np, matplotlib.pyplot as plt

# Prep g/day for Tier 4 locs

## Cameroon, Cote d'Ivoire, DRC, Mozambique, Indonesia

In [3]:
## load targets
import pickle
data_prep_dir = '/ihme/homes/beatrixh/vivarium_research_lsff/data_prep/inputs/'

with open(data_prep_dir + 'lsff_vehicle_nutrient_pairs.pickle', 'rb') as handle:
    vehicle_nutrient_map = pickle.load(handle)
    
with open(data_prep_dir + 'lsff_country_vehicle_pairs.pickle', 'rb') as handle:
    country_vehicle_map = pickle.load(handle)

In [4]:
gday_path =  '/ihme/homes/beatrixh/vivarium_research_lsff/data_prep/inputs/gday_extraction_sheet_03_29_2021.csv'
gday = pd.read_csv(gday_path)

assert(len(gday[gday.location_name.isna()])==0), "Some rows missing location name"

In [5]:
output = pd.DataFrame()

In [6]:
country_vehicle_map.keys()

dict_keys(['India', 'Nigeria', 'Ethiopia', 'Democratic Republic of the Congo', 'Indonesia', 'Bangladesh', 'Pakistan', 'Kenya', 'United Republic of Tanzania', 'South Africa', 'Sudan', 'Uganda', 'Myanmar', 'Ghana', 'Egypt', 'Vietnam', 'Nepal', 'Mozambique', 'Cameroon', 'Angola', "Côte d'Ivoire", 'Madagascar', 'Burkina Faso', 'Niger', 'China', nan])

In [7]:
# location_names = ['Pakistan','Bangladesh','United Republic of Tanzania','Uganda','South Africa']
# location_names = ['Kenya', 'Burkina Faso', 'Myanmar', 'Vietnam', 'Nepal']

location_names = ["Cameroon","Côte d'Ivoire","Democratic Republic of the Congo","Mozambique","Indonesia"]
vehicles = ['wheat flour','maize flour','oil']

In [8]:
# these are the vehicles per country we need
target = pd.DataFrame([(loc,v) for loc in location_names for v in country_vehicle_map[loc]],
            columns=['location_name','vehicle']).sort_values(['location_name','vehicle']).set_index(['location_name','vehicle'])

target

location_name,vehicle
Cameroon,bouillon
Cameroon,maize flour
Cameroon,oil
Cameroon,wheat flour
Côte d'Ivoire,bouillon
Côte d'Ivoire,maize flour
Côte d'Ivoire,oil
Côte d'Ivoire,wheat flour
Democratic Republic of the Congo,bouillon
Democratic Republic of the Congo,maize flour


In [9]:
gday.location_id = gday.location_id.fillna(-1).astype(int)

In [10]:
gday.location_name.unique()

array(['Afghanistan', 'Angola', 'Bangladesh', 'Burkina Faso',
       "Côte d'Ivoire", 'Cameroon', 'Chad', 'China ',
       'Democratic Republic of the Congo', 'Egypt', 'Ethiopia', 'Ghana',
       'India', 'Indonesia', 'Kenya', 'Madagascar', 'Mozambique',
       'Myanmar', 'Nepal', 'Niger', 'Nigeria', 'Pakistan', 'Philippines',
       'South Africa', 'Sudan', 'Uganda', 'United Republic of Tanzania',
       'Viet Nam', 'Yemen', 'Zambia'], dtype=object)

In [11]:
# estimate CIs, crude

# clean value_mean
gday.loc[gday.value_mean=='na','value_mean'] = np.nan
gday.value_mean = gday.value_mean.astype(float)

# clean 2.5th %ile
gday.loc[gday.value_025_percentile=='na','value_025_percentile'] = np.nan
gday.value_025_percentile = gday.value_025_percentile.astype(float)

# clean 97.5th %ile
gday.loc[gday.value_975_percentile=='na','value_975_percentile'] = np.nan
gday.value_975_percentile = gday.value_975_percentile.astype(float)

# calc scale_over_mean
gday['scale_over_mean'] = (gday.value_975_percentile - gday.value_025_percentile) / gday.value_mean

#foreach vehicle, average scale_over_mean
r = gday[['vehicle','scale_over_mean']].groupby('vehicle').mean().dropna().rename(columns={'scale_over_mean':'r'})

In [12]:
# these are the vehicles for which we have a scale_over_mean estimate
r

Unnamed: 0_level_0,r
vehicle,Unnamed: 1_level_1
bouillon,0.902591
wheat flour,0.699893


In [13]:
# for vehicles without a scale_over_mean value, we'll assign the average of the others (row-wise)
r = r.reset_index().append(pd.DataFrame([(i,gday.scale_over_mean.mean()) for i in ['maize flour', 'wheat(not specifically flour)','salt', 'rice']],
            columns = ['vehicle','r']))
r

Unnamed: 0,vehicle,r
0,bouillon,0.902591
1,wheat flour,0.699893
0,maize flour,0.819483
1,wheat(not specifically flour),0.819483
2,salt,0.819483
3,rice,0.819483


In [14]:
# add uncertainty
gday = gday.merge(r, on = 'vehicle', how = 'outer')
gday['lower'] = gday.value_mean - (gday.r * gday.value_mean)/2
gday['upper'] = gday.value_mean + (gday.r * gday.value_mean)/2

In [15]:
gday[(gday.location_name.isin(location_names)) & (gday.vehicle.isin(vehicles))].groupby(['location_name','vehicle']).mean()

Unnamed: 0_level_0,Unnamed: 1_level_0,location_id,subnational_location_id,value_mean,value_025_percentile,value_975_percentile,scale_over_mean,r,lower,upper
location_name,vehicle,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
Cameroon,maize flour,202,,112.6232,,,,0.819483,66.476777,158.769623
Cameroon,wheat flour,202,,57.609459,57.369231,120.846154,0.855478,0.699893,37.449243,77.769676
Côte d'Ivoire,maize flour,205,,60.940833,,,,0.819483,35.970832,85.910835
Côte d'Ivoire,wheat flour,205,,49.0528,,,,0.699893,31.886954,66.218646
Democratic Republic of the Congo,maize flour,171,,51.9,,,,0.819483,30.634405,73.165595
Democratic Republic of the Congo,wheat flour,171,,24.0,,,,0.699893,15.601289,32.398711
Indonesia,wheat flour,11,,56.570417,,,,0.699893,36.773809,76.367024
Mozambique,maize flour,184,,154.6452,,,,0.819483,91.280611,218.009789
Mozambique,wheat flour,184,,45.149583,,,,0.699893,29.349654,60.949513


In [16]:
## dicts for var cleaning

value_d_to_metric = {
 'Median amount of vehicle consumed on previous day among consumers (g/day)': 'median',
 'Mean per capita consumption (g/day)': 'mean',
 'Mean amount of vehicle consumed on previous day among consumers (g/day)': 'mean',
 'Mean micronutrient intake per capita (mg/day)': 'mean',
 'Daily per capita consumption (g)': 'CHECK',
 'Daily consumption (mg/d)': 'CHECK',
 'Consumption per person per day (g)': 'CHECK',
 'Estimated daily contribution from fortified foods (mg/d)': 'CHECK',
 'kg/capita/year': 'CHECK',
 'Mean per capita consumption among consumers (g/day)':'mean'
}

value_d_to_entity = {
 'Median amount of vehicle consumed on previous day among consumers (g/day)': 'vehicle',
 'Mean per capita consumption (g/day)': 'CHECK',
 'Mean amount of vehicle consumed on previous day among consumers (g/day)': 'vehicle',
 'Mean micronutrient intake per capita (mg/day)': 'nutrient',
 'Daily per capita consumption (g)': 'CHECK',
 'Daily consumption (mg/d)': 'CHECK',
 'Consumption per person per day (g)': 'CHECK',
 'Estimated daily contribution from fortified foods (mg/d)': 'CHECK',
 'kg/capita/year': 'CHECK',
 'Mean per capita consumption among consumers (g/day)':'CHECK'
}

value_d_to_mass_unit = {
 'Median amount of vehicle consumed on previous day among consumers (g/day)': 'g',
 'Mean per capita consumption (g/day)': 'g',
 'Mean amount of vehicle consumed on previous day among consumers (g/day)': 'g',
 'Mean micronutrient intake per capita (mg/day)': 'mg',
 'Daily per capita consumption (g)': 'g',
 'Daily consumption (mg/d)': 'mg',
 'Consumption per person per day (g)': 'g',
 'Estimated daily contribution from fortified foods (mg/d)': 'mg',
 'kg/capita/year': 'kg',
 'Mean per capita consumption among consumers (g/day)':'g'
}

value_d_to_time_unit = {
 'Median amount of vehicle consumed on previous day among consumers (g/day)': 'day',
 'Mean per capita consumption (g/day)': 'day',
 'Mean amount of vehicle consumed on previous day among consumers (g/day)': 'day',
 'Mean micronutrient intake per capita (mg/day)': 'day',
 'Daily per capita consumption (g)': 'day',
 'Daily consumption (mg/d)': 'day',
 'Consumption per person per day (g)': 'day',
 'Estimated daily contribution from fortified foods (mg/d)': 'day',
 'kg/capita/year': 'year',
 'Mean per capita consumption among consumers (g/day)':'day'
}

value_d_to_population = {
 'Median amount of vehicle consumed on previous day among consumers (g/day)': 'consumers',
 'Mean per capita consumption (g/day)': 'capita CHECK',
 'Mean amount of vehicle consumed on previous day among consumers (g/day)': 'consumers',
 'Mean micronutrient intake per capita (mg/day)': 'capita CHECK',
 'Daily per capita consumption (g)': 'capita CHECK',
 'Daily consumption (mg/d)': 'CHECK',
 'Consumption per person per day (g)': 'CHECK',
 'Estimated daily contribution from fortified foods (mg/d)': 'CHECK',
 'kg/capita/year': 'CHECK',
 'Mean per capita consumption among consumers (g/day)':'consumers'
}

In [17]:
def format_value_d(df):
    df['metric'] = df.value_description.map(value_d_to_metric)
    df['entity'] = df.value_description.map(value_d_to_entity)
    df['mass_unit'] = df.value_description.map(value_d_to_mass_unit)
    df['time_unit'] = df.value_description.map(value_d_to_time_unit)
    df['pop_denom'] = df.value_description.map(value_d_to_population)
    
    return df

In [18]:
location_names

['Cameroon',
 "Côte d'Ivoire",
 'Democratic Republic of the Congo',
 'Mozambique',
 'Indonesia']

In [19]:
viewcols = ['location_name','subnational_name', 'metric', 'entity',
            'mass_unit', 'time_unit', 'pop_denom','vehicle',
            'value_mean','value_025_percentile','value_975_percentile',
            'sub_population','urbanicity',
            'source_citation', 'source_link', 'source_year', 'source_type']

In [20]:
qcols = ['value_mean','source_citation','source_year','sub_population','source_type']

## Cameroon

In [21]:
cameroon = gday[gday.location_name=="Cameroon"]
cameroon = format_value_d(cameroon)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  This is separate from the ipykernel package so we can avoid doing imports until
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  after removing the cwd from sys.path.
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_ind

In [22]:
cameroon.vehicle.unique()

array(['wheat flour', 'maize flour', 'wheat(not specifically flour)',
       'bouillon'], dtype=object)

In [23]:
cameroon_wheat = cameroon[(cameroon.vehicle=="wheat flour")]
cameroon_maize = cameroon[(cameroon.vehicle=="maize flour")]

In [24]:
cameroon_wheat[['value_mean','source_citation','source_year','sub_population']]

Unnamed: 0,value_mean,source_citation,source_year,sub_population
27,79.0,"Hess SY, Brown KH, Sablah M, Engle-Stone R, Aa...",2011,women of reproductive age
28,90.0,"Hess SY, Brown KH, Sablah M, Engle-Stone R, Aa...",2011,women of reproductive age
29,60.0,"Hess SY, Brown KH, Sablah M, Engle-Stone R, Aa...",2011,women of reproductive age
30,96.0,"Engle-Stone R, Nankap M, Ndjebayi AO, Allen LH...",2009,women of reproductive age
31,77.0,"Engle-Stone R, Nankap M, Ndjebayi AO, Allen LH...",2009,12-59 months
32,90.2,"Engle-Stone R, Ndjebayi AO, Nankap M, Brown KH...",2009,women of reproductive age
33,77.0,"Engle-Stone R, Ndjebayi AO, Nankap M, Brown KH...",2009,women of reproductive age
34,88.9,"Engle-Stone R, Ndjebayi AO, Nankap M, Brown KH...",2009,women of reproductive age
35,104.9,"Engle-Stone R, Ndjebayi AO, Nankap M, Brown KH...",2009,women of reproductive age
36,64.5,"Engle-Stone R, Ndjebayi AO, Nankap M, Brown KH...",2009,12-59 months


In [25]:
cameroon_wheat.loc[(cameroon_wheat.source_link=="https://journals.sagepub.com/doi/pdf/10.1177/156482651303400104") & (cameroon_wheat.urbanicity=="mixed/both") & (cameroon_wheat.sub_population=="women of reproductive age"),'data_choice_notes'] = "Took most recent survey data (Hess, 2011) among WRA (79 g/day/consumer). Discarded GFDx FAO-based estimates (64 g/day/capita) and 2009 Engle-Stone survey estimate (77-104 g/day)"

output = output.append(
    cameroon_wheat.loc[(cameroon_wheat.source_link=="https://journals.sagepub.com/doi/pdf/10.1177/156482651303400104") & (cameroon_wheat.urbanicity=="mixed/both") & (cameroon_wheat.sub_population=="women of reproductive age")]
)

cameroon_wheat.loc[(cameroon_wheat.source_link=="https://journals.sagepub.com/doi/pdf/10.1177/156482651303400104") & (cameroon_wheat.urbanicity=="mixed/both") & (cameroon_wheat.sub_population=="women of reproductive age")]

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self.obj[key] = _infer_fill_value(value)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self.obj[item] = s


Unnamed: 0,location_id,location_name,subnational_name,subnational_location_id,urbanicity,vehicle,nutrient,value_description,value_mean,value_025_percentile,...,scale_over_mean,r,lower,upper,metric,entity,mass_unit,time_unit,pop_denom,data_choice_notes
27,202,Cameroon,na,,mixed/both,wheat flour,na,Median amount of vehicle consumed on previous ...,79.0,11.0,...,2.772152,0.699893,51.354243,106.645757,median,vehicle,g,day,consumers,"Took most recent survey data (Hess, 2011) amon..."


In [26]:
cameroon_maize[['value_mean','source_citation','source_year','sub_population','source_type']]

Unnamed: 0,value_mean,source_citation,source_year,sub_population,source_type
617,96.0,Fortification Handbook: Vitamin and Mineral Fo...,1997-2000,total population,estimated from FAO balance sheets
618,139.34,GFDx,2017,total population,estimated from FAO balance sheets
619,121.9,"Nuss, E. T., & Tanumihardjo, S. A. (2011). Qua...",2007,total population,estimated from FAO balance sheets
807,105.0,GFDx,1995,total population,estimated from FAO balance sheets
808,110.0,GFDx,1996,total population,estimated from FAO balance sheets
809,106.0,GFDx,1997,total population,estimated from FAO balance sheets
810,107.0,GFDx,1998,total population,estimated from FAO balance sheets
811,135.0,GFDx,1999,total population,estimated from FAO balance sheets
812,118.0,GFDx,2000,total population,estimated from FAO balance sheets
813,97.0,GFDx,2001,total population,estimated from FAO balance sheets


In [27]:
cameroon_maize.loc[(cameroon_maize.source_year=="2017"),"data_choice_notes"] = "Kept 2017 GFDx number (139 g/day), discarded 2007 Nuss FAO balance estimate (122 g/day) and 1997-2000 FAO balance estimate (96 g/day)"

output = output.append(
    cameroon_maize.loc[(cameroon_maize.source_year=="2017")]
)

cameroon_maize.loc[(cameroon_maize.source_year=="2017")]

Unnamed: 0,location_id,location_name,subnational_name,subnational_location_id,urbanicity,vehicle,nutrient,value_description,value_mean,value_025_percentile,...,scale_over_mean,r,lower,upper,metric,entity,mass_unit,time_unit,pop_denom,data_choice_notes
618,202,Cameroon,na,,mixed/both,maize flour,na,Mean per capita consumption (g/day),139.34,,...,,0.819483,82.24659,196.43341,mean,CHECK,g,day,capita CHECK,"Kept 2017 GFDx number (139 g/day), discarded 2..."


## Côte d'Ivoire

In [28]:
civoire = gday[gday.location_name=="Côte d'Ivoire"]
civoire = format_value_d(civoire)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  This is separate from the ipykernel package so we can avoid doing imports until
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  after removing the cwd from sys.path.
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_ind

In [29]:
civoire.vehicle.unique()

array(['wheat flour', 'maize flour', 'wheat(not specifically flour)',
       'bouillon'], dtype=object)

In [30]:
civoire_wheat = civoire[(civoire.vehicle=="wheat flour")]
civoire_maize = civoire[(civoire.vehicle=="maize flour")]

In [31]:
civoire_wheat[['value_mean','source_citation','source_year','sub_population','source_type']]

Unnamed: 0,value_mean,source_citation,source_year,sub_population,source_type
24,33.0,Fortification Handbook: Vitamin and Mineral Fo...,1997-2000,total population,estimated from FAO balance sheets
25,70.08,GFDx,2017,total population,estimated from FAO balance sheets
26,33.7,"Sablah M, Klopp J, Steinberg D, Touaoro Z, Lai...",2002,total population,Survey - cross-sectional
406,44.0,GFDx,1995,total population,estimated from FAO balance sheets
407,43.0,GFDx,1996,total population,estimated from FAO balance sheets
408,39.0,GFDx,1997,total population,estimated from FAO balance sheets
409,50.0,GFDx,1998,total population,estimated from FAO balance sheets
410,49.0,GFDx,1999,total population,estimated from FAO balance sheets
411,46.0,GFDx,2000,total population,estimated from FAO balance sheets
412,47.0,GFDx,2001,total population,estimated from FAO balance sheets


In [32]:
civoire_wheat.loc[(civoire_wheat.source_year=="2017"),"data_choice_notes"] = "Kept 2017 GFDx number (70 g/day), discarded Sablah 2002 survey number (33.7 g/day, noting that GFDx estimated 48 g/day for 2002), and discarded and 1997-2000 Forfitifation handbook FAO balance sheet number (33 g/day)"

output = output.append(
    civoire_wheat.loc[(civoire_wheat.source_year=="2017")]
)

civoire_wheat.loc[(civoire_wheat.source_year=="2017")]

Unnamed: 0,location_id,location_name,subnational_name,subnational_location_id,urbanicity,vehicle,nutrient,value_description,value_mean,value_025_percentile,...,scale_over_mean,r,lower,upper,metric,entity,mass_unit,time_unit,pop_denom,data_choice_notes
25,205,Côte d'Ivoire,na,,mixed/both,wheat flour,na,Mean per capita consumption (g/day),70.08,,...,,0.699893,45.555764,94.604236,mean,CHECK,g,day,capita CHECK,"Kept 2017 GFDx number (70 g/day), discarded Sa..."


In [33]:
civoire_maize[qcols]

Unnamed: 0,value_mean,source_citation,source_year,sub_population,source_type
615,56.05,GFDx,2017,total population,estimated from FAO balance sheets
616,43.9,"Nuss, E. T., & Tanumihardjo, S. A. (2011). Qua...",2007,total population,estimated from FAO balance sheets
829,76.0,GFDx,1995,total population,estimated from FAO balance sheets
830,75.0,GFDx,1996,total population,estimated from FAO balance sheets
831,72.0,GFDx,1997,total population,estimated from FAO balance sheets
832,72.0,GFDx,1998,total population,estimated from FAO balance sheets
833,65.0,GFDx,1999,total population,estimated from FAO balance sheets
834,63.0,GFDx,2000,total population,estimated from FAO balance sheets
835,64.0,GFDx,2001,total population,estimated from FAO balance sheets
836,61.0,GFDx,2002,total population,estimated from FAO balance sheets


In [34]:
civoire_maize.loc[(civoire_maize.source_link=="https://doi.org/10.3945/an.110.000182"),"data_choice_notes"] = "Discarded 2007 Nuss FAO balance sheet estimate (44 g/day) for 2017 GFDx FAO balance sheet estimate (56 g/day)"

output = output.append(
    civoire_maize.loc[(civoire_maize.source_link=="https://doi.org/10.3945/an.110.000182")]
)

civoire_maize.loc[(civoire_maize.source_link=="https://doi.org/10.3945/an.110.000182")]

Unnamed: 0,location_id,location_name,subnational_name,subnational_location_id,urbanicity,vehicle,nutrient,value_description,value_mean,value_025_percentile,...,scale_over_mean,r,lower,upper,metric,entity,mass_unit,time_unit,pop_denom,data_choice_notes
616,205,Côte d'Ivoire,,,,maize flour,na,Mean per capita consumption (g/day),43.9,,...,,0.819483,25.912339,61.887661,mean,CHECK,g,day,capita CHECK,Discarded 2007 Nuss FAO balance sheet estimate...


## Democratic Republic of the Congo

In [35]:
drc = gday[gday.location_name=="Democratic Republic of the Congo"]
drc = format_value_d(drc)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  This is separate from the ipykernel package so we can avoid doing imports until
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  after removing the cwd from sys.path.
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_ind

In [36]:
drc.vehicle.unique()

array([nan, 'wheat flour', 'maize flour'], dtype=object)

In [37]:
drc_wheat = drc[drc.vehicle=="wheat flour"]
drc_maize = drc[drc.vehicle=="maize flour"]

In [38]:
drc_wheat[qcols + ['value_description']]

Unnamed: 0,value_mean,source_citation,source_year,sub_population,source_type,value_description
48,14.0,Fortification Handbook: Vitamin and Mineral Fo...,1997-2000,total population,estimated from FAO balance sheets,Mean per capita consumption (g/day)
49,34.0,GFDx,2000,total population,estimated from FAO balance sheets,Mean per capita consumption (g/day)


In [39]:
drc_wheat.loc[(drc_wheat.source_year=="2000"),"data_choice_notes"] = "Kept 2000 GFDx number (34 g/day), discarded 1997-2000 fortification handbook number (14 g/day), both from FAO"

output = output.append(
    drc_wheat.loc[(drc_wheat.source_year=="2000")]
)

drc_wheat.loc[(drc_wheat.source_year=="2000")]

Unnamed: 0,location_id,location_name,subnational_name,subnational_location_id,urbanicity,vehicle,nutrient,value_description,value_mean,value_025_percentile,...,scale_over_mean,r,lower,upper,metric,entity,mass_unit,time_unit,pop_denom,data_choice_notes
49,171,Democratic Republic of the Congo,na,,mixed/both,wheat flour,na,Mean per capita consumption (g/day),34.0,,...,,0.699893,22.101826,45.898174,mean,CHECK,g,day,capita CHECK,"Kept 2000 GFDx number (34 g/day), discarded 19..."


In [40]:
drc_maize[qcols + ['value_description']]

Unnamed: 0,value_mean,source_citation,source_year,sub_population,source_type,value_description
621,43.0,Fortification Handbook: Vitamin and Mineral Fo...,1997-2000,total population,estimated from FAO balance sheets,Mean per capita consumption (g/day)
622,60.8,"Nuss, E. T., & Tanumihardjo, S. A. (2011). Qua...",2007,total population,estimated from FAO balance sheets,Mean per capita consumption (g/day)


In [41]:
drc_maize.loc[(drc_maize.source_year=="2007"),"data_choice_notes"] = "Discarded 1997-2000 fortification handbook number (43 g/day) estimated from FAO balance sheets in lieu of 2007 Nuss number (60.8 g/day), also from FAO balance sheets"

output = output.append(
    drc_maize.loc[(drc_maize.source_year=="2007")]
)

drc_maize.loc[(drc_maize.source_year=="2007")]

Unnamed: 0,location_id,location_name,subnational_name,subnational_location_id,urbanicity,vehicle,nutrient,value_description,value_mean,value_025_percentile,...,scale_over_mean,r,lower,upper,metric,entity,mass_unit,time_unit,pop_denom,data_choice_notes
622,171,Democratic Republic of the Congo,na,,mixed/both,maize flour,na,Mean per capita consumption (g/day),60.8,,...,,0.819483,35.887704,85.712296,mean,CHECK,g,day,capita CHECK,Discarded 1997-2000 fortification handbook num...


## Mozambique

In [42]:
moz = gday[gday.location_name=="Mozambique"]
moz = format_value_d(moz)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  This is separate from the ipykernel package so we can avoid doing imports until
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  after removing the cwd from sys.path.
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_ind

In [43]:
moz.vehicle.unique()

array(['wheat flour', 'maize flour', 'wheat(not specifically flour)'],
      dtype=object)

In [44]:
moz_wheat = moz[(moz.vehicle=="wheat flour")]
moz_maize = moz[(moz.vehicle=="maize flour")]

In [45]:
moz_wheat[qcols]

Unnamed: 0,value_mean,source_citation,source_year,sub_population,source_type
65,28.0,Fortification Handbook: Vitamin and Mineral Fo...,1997-2000,total population,estimated from FAO balance sheets
66,65.92,GFDx,2017,total population,estimated from FAO balance sheets
428,41.0,GFDx,1995,total population,estimated from FAO balance sheets
429,39.0,GFDx,1996,total population,estimated from FAO balance sheets
430,32.0,GFDx,1997,total population,estimated from FAO balance sheets
431,36.0,GFDx,1998,total population,estimated from FAO balance sheets
432,31.0,GFDx,1999,total population,estimated from FAO balance sheets
433,37.0,GFDx,2000,total population,estimated from FAO balance sheets
434,36.0,GFDx,2001,total population,estimated from FAO balance sheets
435,42.0,GFDx,2002,total population,estimated from FAO balance sheets


In [46]:
moz_wheat.loc[(moz_wheat.source_year=="2017"),"data_choice_notes"] = "Discarded 1997-2000 fortification handbook number (28 g/day) for 2017 GFDx number (66 g/day), both from FAO balance sheets"

output = output.append(
    moz_wheat.loc[(moz_wheat.source_year=="2017")]
)

moz_wheat.loc[(moz_wheat.source_year=="2017")]

Unnamed: 0,location_id,location_name,subnational_name,subnational_location_id,urbanicity,vehicle,nutrient,value_description,value_mean,value_025_percentile,...,scale_over_mean,r,lower,upper,metric,entity,mass_unit,time_unit,pop_denom,data_choice_notes
66,184,Mozambique,na,,mixed/both,wheat flour,na,Mean per capita consumption (g/day),65.92,,...,,0.699893,42.85154,88.98846,mean,CHECK,g,day,capita CHECK,Discarded 1997-2000 fortification handbook num...


In [47]:
moz_maize[qcols]

Unnamed: 0,value_mean,source_citation,source_year,sub_population,source_type
633,108.0,Fortification Handbook: Vitamin and Mineral Fo...,1997-2000,total population,estimated from FAO balance sheets
634,180.6,GFDx,2017,total population,estimated from FAO balance sheets
635,140.0,"Nuss, E. T., & Tanumihardjo, S. A. (2011). Qua...",2007,total population,estimated from FAO balance sheets
851,132.0,GFDx,1995,total population,estimated from FAO balance sheets
852,147.0,GFDx,1996,total population,estimated from FAO balance sheets
853,154.0,GFDx,1997,total population,estimated from FAO balance sheets
854,151.0,GFDx,1998,total population,estimated from FAO balance sheets
855,170.0,GFDx,1999,total population,estimated from FAO balance sheets
856,173.0,GFDx,2000,total population,estimated from FAO balance sheets
857,166.0,GFDx,2001,total population,estimated from FAO balance sheets


In [48]:
moz_maize.loc[(moz_maize.source_year=="2017"),"data_choice_notes"] = "Discarded 1997-2000 fortification handbook number (108 g/day), along with 2007 Nuss number (140 g/day), in lieu of 2017 GFDx number (180 g/day). All came from FAO balance sheets."

output = output.append(
    moz_maize.loc[(moz_maize.source_year=="2017")]
)

moz_maize.loc[(moz_maize.source_year=="2017")]

Unnamed: 0,location_id,location_name,subnational_name,subnational_location_id,urbanicity,vehicle,nutrient,value_description,value_mean,value_025_percentile,...,scale_over_mean,r,lower,upper,metric,entity,mass_unit,time_unit,pop_denom,data_choice_notes
634,184,Mozambique,na,,mixed/both,maize flour,na,Mean per capita consumption (g/day),180.6,,...,,0.819483,106.600647,254.599353,mean,CHECK,g,day,capita CHECK,Discarded 1997-2000 fortification handbook num...


## Indonesia

In [49]:
indonesia = gday[gday.location_name=="Indonesia"]
indonesia = format_value_d(indonesia)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  This is separate from the ipykernel package so we can avoid doing imports until
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  after removing the cwd from sys.path.
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_ind

In [50]:
indonesia.vehicle.unique()

array(['wheat flour'], dtype=object)

In [51]:
indonesia[qcols]

Unnamed: 0,value_mean,source_citation,source_year,sub_population,source_type
59,35.0,Fortification Handbook: Vitamin and Mineral Fo...,1997-2000,total population,estimated from FAO balance sheets
60,73.59,GFDx,2017,total population,estimated from FAO balance sheets
296,56.0,GFDx,1995,total population,estimated from FAO balance sheets
297,53.0,GFDx,1996,total population,estimated from FAO balance sheets
298,46.0,GFDx,1997,total population,estimated from FAO balance sheets
299,44.0,GFDx,1998,total population,estimated from FAO balance sheets
300,39.0,GFDx,1999,total population,estimated from FAO balance sheets
301,44.0,GFDx,2000,total population,estimated from FAO balance sheets
302,43.0,GFDx,2001,total population,estimated from FAO balance sheets
303,52.0,GFDx,2002,total population,estimated from FAO balance sheets


In [52]:
indonesia.loc[(indonesia.source_year=="2017"),"data_choice_notes"] = "discarded 1997-2000 fortification handbook number (35 g/day) for 2017 GFDx number (73 g/day). Both came from FAO balance sheets."

output = output.append(
    indonesia.loc[(indonesia.source_year=="2017")]
)

indonesia.loc[(indonesia.source_year=="2017")]

Unnamed: 0,location_id,location_name,subnational_name,subnational_location_id,urbanicity,vehicle,nutrient,value_description,value_mean,value_025_percentile,...,scale_over_mean,r,lower,upper,metric,entity,mass_unit,time_unit,pop_denom,data_choice_notes
60,11,Indonesia,na,,unknown,wheat flour,na,Mean per capita consumption (g/day),73.59,,...,,0.699893,47.837452,99.342548,mean,CHECK,g,day,capita CHECK,discarded 1997-2000 fortification handbook num...


# Format output

In [53]:
group_cols = ['location_id','location_name','sub_population','vehicle','metric','mass_unit','time_unit','pop_denom','source_citation','source_link','data_choice_notes']

In [54]:
loc_metadata = get_locs(location_set_id=35, gbd_round_id=6, decomp_step="step4")

In [55]:
output = output.drop(columns = 'location_id')
output = loc_metadata[['location_id','location_name']].merge(output, on = 'location_name', how = 'right')

In [56]:
output[['location_id','location_name','vehicle','value_mean','definition validated', 'CI validated']]

Unnamed: 0,location_id,location_name,vehicle,value_mean,definition validated,CI validated
0,11,Indonesia,wheat flour,73.59,Yes,
1,171,Democratic Republic of the Congo,wheat flour,34.0,Yes,
2,171,Democratic Republic of the Congo,maize flour,60.8,Yes,
3,184,Mozambique,wheat flour,65.92,Yes,
4,184,Mozambique,maize flour,180.6,Yes,
5,202,Cameroon,wheat flour,79.0,Yes,Yes
6,202,Cameroon,maize flour,139.34,Yes,
7,205,Côte d'Ivoire,wheat flour,70.08,Yes,
8,205,Côte d'Ivoire,maize flour,43.9,Yes,


In [57]:
metric_map = {
    'mean':'mean',
    'CHECK':'mean'
}

pop_denom_map = {
    'capita':'capita',
    'CHECK':'capita',
    'capita CHECK':'capita',
    'consumers':'consumers'
}

output.metric = output.metric.map(metric_map)
output.pop_denom = output.pop_denom.map(pop_denom_map)

In [58]:
output[['value_mean'] + group_cols]

Unnamed: 0,value_mean,location_id,location_name,sub_population,vehicle,metric,mass_unit,time_unit,pop_denom,source_citation,source_link,data_choice_notes
0,73.59,11,Indonesia,total population,wheat flour,mean,g,day,capita,GFDx,https://fortificationdata.org/country-fortific...,discarded 1997-2000 fortification handbook num...
1,34.0,171,Democratic Republic of the Congo,total population,wheat flour,mean,g,day,capita,GFDx,https://fortificationdata.org/country-fortific...,"Kept 2000 GFDx number (34 g/day), discarded 19..."
2,60.8,171,Democratic Republic of the Congo,total population,maize flour,mean,g,day,capita,"Nuss, E. T., & Tanumihardjo, S. A. (2011). Qua...",https://doi.org/10.3945/an.110.000182,Discarded 1997-2000 fortification handbook num...
3,65.92,184,Mozambique,total population,wheat flour,mean,g,day,capita,GFDx,https://fortificationdata.org/country-fortific...,Discarded 1997-2000 fortification handbook num...
4,180.6,184,Mozambique,total population,maize flour,mean,g,day,capita,GFDx,https://fortificationdata.org/country-fortific...,Discarded 1997-2000 fortification handbook num...
5,79.0,202,Cameroon,women of reproductive age,wheat flour,,g,day,consumers,"Hess SY, Brown KH, Sablah M, Engle-Stone R, Aa...",https://journals.sagepub.com/doi/pdf/10.1177/1...,"Took most recent survey data (Hess, 2011) amon..."
6,139.34,202,Cameroon,total population,maize flour,mean,g,day,capita,GFDx,https://fortificationdata.org/country-fortific...,"Kept 2017 GFDx number (139 g/day), discarded 2..."
7,70.08,205,Côte d'Ivoire,total population,wheat flour,mean,g,day,capita,GFDx,https://fortificationdata.org/country-fortific...,"Kept 2017 GFDx number (70 g/day), discarded Sa..."
8,43.9,205,Côte d'Ivoire,total population,maize flour,mean,g,day,capita,"Nuss, E. T., & Tanumihardjo, S. A. (2011). Qua...",https://doi.org/10.3945/an.110.000182,Discarded 2007 Nuss FAO balance sheet estimate...


In [59]:
output.loc[(output.mass_unit=="kg"),'value_mean'] = output.value_mean * 1_000

output.loc[(output.mass_unit=="kg"),'lower'] = output.lower * 1_000
output.loc[(output.mass_unit=="kg"),'upper'] = output.upper * 1_000

output.loc[(output.mass_unit=="kg"),'value_025_percentile'] = output.value_025_percentile * 1_000
output.loc[(output.mass_unit=="kg"),'value_975_percentile'] = output.value_975_percentile * 1_000

output.loc[(output.mass_unit=="kg"),'mass_unit'] = 'g'

In [60]:
output.loc[(output.time_unit=="year"),'value_mean'] = output.value_mean / 365

output.loc[(output.time_unit=="year"),'lower'] = output.lower / 365
output.loc[(output.time_unit=="year"),'upper'] = output.upper / 365

output.loc[(output.time_unit=="year"),'value_025_percentile'] = output.value_025_percentile / 365
output.loc[(output.time_unit=="year"),'value_975_percentile'] = output.value_975_percentile / 365

output.loc[(output.time_unit=="year"),'time_unit'] = 'day'

In [61]:
output[['value_mean'] + group_cols]

Unnamed: 0,value_mean,location_id,location_name,sub_population,vehicle,metric,mass_unit,time_unit,pop_denom,source_citation,source_link,data_choice_notes
0,73.59,11,Indonesia,total population,wheat flour,mean,g,day,capita,GFDx,https://fortificationdata.org/country-fortific...,discarded 1997-2000 fortification handbook num...
1,34.0,171,Democratic Republic of the Congo,total population,wheat flour,mean,g,day,capita,GFDx,https://fortificationdata.org/country-fortific...,"Kept 2000 GFDx number (34 g/day), discarded 19..."
2,60.8,171,Democratic Republic of the Congo,total population,maize flour,mean,g,day,capita,"Nuss, E. T., & Tanumihardjo, S. A. (2011). Qua...",https://doi.org/10.3945/an.110.000182,Discarded 1997-2000 fortification handbook num...
3,65.92,184,Mozambique,total population,wheat flour,mean,g,day,capita,GFDx,https://fortificationdata.org/country-fortific...,Discarded 1997-2000 fortification handbook num...
4,180.6,184,Mozambique,total population,maize flour,mean,g,day,capita,GFDx,https://fortificationdata.org/country-fortific...,Discarded 1997-2000 fortification handbook num...
5,79.0,202,Cameroon,women of reproductive age,wheat flour,,g,day,consumers,"Hess SY, Brown KH, Sablah M, Engle-Stone R, Aa...",https://journals.sagepub.com/doi/pdf/10.1177/1...,"Took most recent survey data (Hess, 2011) amon..."
6,139.34,202,Cameroon,total population,maize flour,mean,g,day,capita,GFDx,https://fortificationdata.org/country-fortific...,"Kept 2017 GFDx number (139 g/day), discarded 2..."
7,70.08,205,Côte d'Ivoire,total population,wheat flour,mean,g,day,capita,GFDx,https://fortificationdata.org/country-fortific...,"Kept 2017 GFDx number (70 g/day), discarded Sa..."
8,43.9,205,Côte d'Ivoire,total population,maize flour,mean,g,day,capita,"Nuss, E. T., & Tanumihardjo, S. A. (2011). Qua...",https://doi.org/10.3945/an.110.000182,Discarded 2007 Nuss FAO balance sheet estimate...


In [62]:
output[['value_mean','lower','upper'] + group_cols]

Unnamed: 0,value_mean,lower,upper,location_id,location_name,sub_population,vehicle,metric,mass_unit,time_unit,pop_denom,source_citation,source_link,data_choice_notes
0,73.59,47.837452,99.342548,11,Indonesia,total population,wheat flour,mean,g,day,capita,GFDx,https://fortificationdata.org/country-fortific...,discarded 1997-2000 fortification handbook num...
1,34.0,22.101826,45.898174,171,Democratic Republic of the Congo,total population,wheat flour,mean,g,day,capita,GFDx,https://fortificationdata.org/country-fortific...,"Kept 2000 GFDx number (34 g/day), discarded 19..."
2,60.8,35.887704,85.712296,171,Democratic Republic of the Congo,total population,maize flour,mean,g,day,capita,"Nuss, E. T., & Tanumihardjo, S. A. (2011). Qua...",https://doi.org/10.3945/an.110.000182,Discarded 1997-2000 fortification handbook num...
3,65.92,42.85154,88.98846,184,Mozambique,total population,wheat flour,mean,g,day,capita,GFDx,https://fortificationdata.org/country-fortific...,Discarded 1997-2000 fortification handbook num...
4,180.6,106.600647,254.599353,184,Mozambique,total population,maize flour,mean,g,day,capita,GFDx,https://fortificationdata.org/country-fortific...,Discarded 1997-2000 fortification handbook num...
5,79.0,51.354243,106.645757,202,Cameroon,women of reproductive age,wheat flour,,g,day,consumers,"Hess SY, Brown KH, Sablah M, Engle-Stone R, Aa...",https://journals.sagepub.com/doi/pdf/10.1177/1...,"Took most recent survey data (Hess, 2011) amon..."
6,139.34,82.24659,196.43341,202,Cameroon,total population,maize flour,mean,g,day,capita,GFDx,https://fortificationdata.org/country-fortific...,"Kept 2017 GFDx number (139 g/day), discarded 2..."
7,70.08,45.555764,94.604236,205,Côte d'Ivoire,total population,wheat flour,mean,g,day,capita,GFDx,https://fortificationdata.org/country-fortific...,"Kept 2017 GFDx number (70 g/day), discarded Sa..."
8,43.9,25.912339,61.887661,205,Côte d'Ivoire,total population,maize flour,mean,g,day,capita,"Nuss, E. T., & Tanumihardjo, S. A. (2011). Qua...",https://doi.org/10.3945/an.110.000182,Discarded 2007 Nuss FAO balance sheet estimate...


In [63]:
output = output.rename(columns = {
    'source_citation':'mean_value_source_citation',
    'source_link':'mean_value_source_link'
})

In [64]:
assert(len(output[output.value_mean >= output.upper])==0), "check upper"
assert(len(output[output.value_mean <= output.lower])==0), "check lower"

# check for missing values

In [65]:
output.loc[(output.location_name=="Viet Nam"),'location_name'] = "Vietnam"

In [66]:
## load legal combos
import pickle
data_prep_dir = '/ihme/homes/beatrixh/vivarium_research_lsff/data_prep/inputs/'

with open(data_prep_dir + 'lsff_vehicle_nutrient_pairs.pickle', 'rb') as handle:
    vehicle_nutrient_map = pickle.load(handle)
    
with open(data_prep_dir + 'lsff_country_vehicle_pairs.pickle', 'rb') as handle:
    country_vehicle_map = pickle.load(handle)
    
with open(data_prep_dir + 'lsff_vehicle_country_pairs.pickle', 'rb') as handle:
    vehicle_country_map = pickle.load(handle)

In [67]:
target_vehicles = [i for i in vehicle_nutrient_map.keys() if 'iron' in vehicle_nutrient_map[i]]
set([i for j in location_names for i in country_vehicle_map[j]])

{'bouillon', 'maize flour', 'oil', 'wheat flour'}

In [68]:
# make sure nothing missing. note we're not interested in oil/iron
output.reset_index().merge(target.reset_index(), on = ['location_name','vehicle'], how = 'outer')

Unnamed: 0,index,location_id,location_name,subnational_name,subnational_location_id,urbanicity,vehicle,nutrient,value_description,value_mean,...,scale_over_mean,r,lower,upper,metric,entity,mass_unit,time_unit,pop_denom,data_choice_notes
0,0.0,11.0,Indonesia,na,,unknown,wheat flour,na,Mean per capita consumption (g/day),73.59,...,,0.699893,47.837452,99.342548,mean,CHECK,g,day,capita,discarded 1997-2000 fortification handbook num...
1,1.0,171.0,Democratic Republic of the Congo,na,,mixed/both,wheat flour,na,Mean per capita consumption (g/day),34.0,...,,0.699893,22.101826,45.898174,mean,CHECK,g,day,capita,"Kept 2000 GFDx number (34 g/day), discarded 19..."
2,2.0,171.0,Democratic Republic of the Congo,na,,mixed/both,maize flour,na,Mean per capita consumption (g/day),60.8,...,,0.819483,35.887704,85.712296,mean,CHECK,g,day,capita,Discarded 1997-2000 fortification handbook num...
3,3.0,184.0,Mozambique,na,,mixed/both,wheat flour,na,Mean per capita consumption (g/day),65.92,...,,0.699893,42.85154,88.98846,mean,CHECK,g,day,capita,Discarded 1997-2000 fortification handbook num...
4,4.0,184.0,Mozambique,na,,mixed/both,maize flour,na,Mean per capita consumption (g/day),180.6,...,,0.819483,106.600647,254.599353,mean,CHECK,g,day,capita,Discarded 1997-2000 fortification handbook num...
5,5.0,202.0,Cameroon,na,,mixed/both,wheat flour,na,Median amount of vehicle consumed on previous ...,79.0,...,2.772152,0.699893,51.354243,106.645757,,vehicle,g,day,consumers,"Took most recent survey data (Hess, 2011) amon..."
6,6.0,202.0,Cameroon,na,,mixed/both,maize flour,na,Mean per capita consumption (g/day),139.34,...,,0.819483,82.24659,196.43341,mean,CHECK,g,day,capita,"Kept 2017 GFDx number (139 g/day), discarded 2..."
7,7.0,205.0,Côte d'Ivoire,na,,mixed/both,wheat flour,na,Mean per capita consumption (g/day),70.08,...,,0.699893,45.555764,94.604236,mean,CHECK,g,day,capita,"Kept 2017 GFDx number (70 g/day), discarded Sa..."
8,8.0,205.0,Côte d'Ivoire,,,,maize flour,na,Mean per capita consumption (g/day),43.9,...,,0.819483,25.912339,61.887661,mean,CHECK,g,day,capita,Discarded 2007 Nuss FAO balance sheet estimate...
9,,,Cameroon,,,,bouillon,,,,...,,,,,,,,,,


In [69]:
#reorder columns
output = output[['location_id', 'location_name', 'sub_population', 'vehicle', 'metric',
       'mass_unit', 'time_unit', 'pop_denom','value_mean', 'lower',
       'upper', 'mean_value_source_citation',
       'mean_value_source_link', 'data_choice_notes']]

In [70]:
output

Unnamed: 0,location_id,location_name,sub_population,vehicle,metric,mass_unit,time_unit,pop_denom,value_mean,lower,upper,mean_value_source_citation,mean_value_source_link,data_choice_notes
0,11,Indonesia,total population,wheat flour,mean,g,day,capita,73.59,47.837452,99.342548,GFDx,https://fortificationdata.org/country-fortific...,discarded 1997-2000 fortification handbook num...
1,171,Democratic Republic of the Congo,total population,wheat flour,mean,g,day,capita,34.0,22.101826,45.898174,GFDx,https://fortificationdata.org/country-fortific...,"Kept 2000 GFDx number (34 g/day), discarded 19..."
2,171,Democratic Republic of the Congo,total population,maize flour,mean,g,day,capita,60.8,35.887704,85.712296,"Nuss, E. T., & Tanumihardjo, S. A. (2011). Qua...",https://doi.org/10.3945/an.110.000182,Discarded 1997-2000 fortification handbook num...
3,184,Mozambique,total population,wheat flour,mean,g,day,capita,65.92,42.85154,88.98846,GFDx,https://fortificationdata.org/country-fortific...,Discarded 1997-2000 fortification handbook num...
4,184,Mozambique,total population,maize flour,mean,g,day,capita,180.6,106.600647,254.599353,GFDx,https://fortificationdata.org/country-fortific...,Discarded 1997-2000 fortification handbook num...
5,202,Cameroon,women of reproductive age,wheat flour,,g,day,consumers,79.0,51.354243,106.645757,"Hess SY, Brown KH, Sablah M, Engle-Stone R, Aa...",https://journals.sagepub.com/doi/pdf/10.1177/1...,"Took most recent survey data (Hess, 2011) amon..."
6,202,Cameroon,total population,maize flour,mean,g,day,capita,139.34,82.24659,196.43341,GFDx,https://fortificationdata.org/country-fortific...,"Kept 2017 GFDx number (139 g/day), discarded 2..."
7,205,Côte d'Ivoire,total population,wheat flour,mean,g,day,capita,70.08,45.555764,94.604236,GFDx,https://fortificationdata.org/country-fortific...,"Kept 2017 GFDx number (70 g/day), discarded Sa..."
8,205,Côte d'Ivoire,total population,maize flour,mean,g,day,capita,43.9,25.912339,61.887661,"Nuss, E. T., & Tanumihardjo, S. A. (2011). Qua...",https://doi.org/10.3945/an.110.000182,Discarded 2007 Nuss FAO balance sheet estimate...


In [72]:
output = output.sort_values(['location_name','vehicle'])

In [73]:
save_path = '/ihme/homes/beatrixh/vivarium_research_lsff/data_prep/outputs/gday_tier4_04_02_2021.csv'
output.to_csv(save_path, index = False)

In [74]:
save_path = '/ihme/homes/beatrixh/repos/scratch/gday_tier4_04_02_2021.csv'
output.to_csv(save_path, index = False)