In [1]:
from db_queries import get_population, get_ids
from db_queries import get_location_metadata as get_locs

In [2]:
import pandas as pd, numpy as np

# LSFF: choose population coverage data by hand for tier 3 countries

## vehicles: Wheat flour, maize flour, oil

## countries: Kenya, Burkina Faso, Myanmar, Vietnam, Nepal

In [3]:
nutrients = ['iron','zinc','folic acid','vitamin a']

In [4]:
data_path = '/ihme/homes/beatrixh/vivarium_research_lsff/data_prep/inputs/extraction_sheet_lsff_03_22_2021.csv'
assm_path = '/ihme/homes/beatrixh/vivarium_research_lsff/data_prep/inputs/extraction_sheet_lsff_assumed_coverage_03_22_2021.csv'

df = pd.read_csv(data_path)

In [5]:
df.loc[df.nutrient=="folic acid, folate, b9",'nutrient']= 'folic acid'

In [6]:
assum = pd.read_csv(assm_path)

In [7]:
assum.location_name.unique()

array(['Ethiopia', 'Myanmar', 'India'], dtype=object)

In [8]:
assum.loc[assum.nutrient=="folic acid, folate, b9",'nutrient']= 'folic acid'

In [9]:
df['estimation_status'] = 'na'

In [10]:
df['data_choice_notes'] = ""

In [11]:
mult_estimates_path = '/ihme/homes/beatrixh/vivarium_research_lsff/data_prep/outputs/lsff_data_estimated_03_22_2021.csv'

mult_estimates = pd.read_csv(mult_estimates_path)

In [12]:
#reformat
mult_estimates.loc[(mult_estimates.B_estimate!=1.0),'B'] = np.nan
mult_estimates.loc[(mult_estimates.C_estimate!=1.0),'C'] = np.nan

mult_estimates = pd.melt(mult_estimates,
                         id_vars = ['location_name','vehicle','nutrient','standard'],
                         value_vars = ['B','C'], var_name = 'value_description', value_name = 'value_mean').dropna()

mult_estimates.loc[(mult_estimates.value_description=="B"),'nutrient'] = 'NA'

mult_estimates = mult_estimates.drop_duplicates()

mult_estimates.value_description = mult_estimates.value_description.map({
    'B':'percent of population eating industrially produced vehicle',
    'C':'percent of population eating fortified vehicle'
})

In [13]:
mult_estimates['estimation_status'] = 'multiplicative'

In [14]:
## load legal combos
import pickle
data_prep_dir = '/ihme/homes/beatrixh/vivarium_research_lsff/data_prep/inputs/'

with open(data_prep_dir + 'lsff_vehicle_nutrient_pairs.pickle', 'rb') as handle:
    vehicle_nutrient_map = pickle.load(handle)
    
with open(data_prep_dir + 'lsff_country_vehicle_pairs.pickle', 'rb') as handle:
    country_vehicle_map = pickle.load(handle)
    
with open(data_prep_dir + 'lsff_vehicle_country_pairs.pickle', 'rb') as handle:
    vehicle_country_map = pickle.load(handle)

In [15]:
location_names = ['Kenya','Burkina Faso','Myanmar','Vietnam','Nepal']
vehicles = ['maize flour','wheat flour','oil']
nutrients = ['folic acid','iron','zinc','vitamin a']

In [16]:
# these are the vehicles per country we need
target_high_level = pd.DataFrame([(loc,v) for loc in location_names for v in country_vehicle_map[loc]],
            columns=['location_name','vehicle']).sort_values(['location_name','vehicle'])

target_high_level = target_high_level[target_high_level.vehicle.isin(vehicles)].set_index(['location_name','vehicle'])

target_high_level

location_name,vehicle
Burkina Faso,maize flour
Burkina Faso,oil
Burkina Faso,wheat flour
Kenya,maize flour
Kenya,oil
Kenya,wheat flour
Myanmar,oil
Myanmar,wheat flour
Nepal,oil
Nepal,wheat flour


In [17]:
target_a = pd.DataFrame([(loc,vehicle,nutrient,'percent of population eating fortified vehicle') for loc in location_names
                       for vehicle in country_vehicle_map[loc]
                      for nutrient in vehicle_nutrient_map[vehicle]],
            columns=['location_name','vehicle','nutrient','value_description']).sort_values(['location_name','vehicle','nutrient'])
target_a = target_a[(target_a.nutrient.isin(nutrients))]

target_b = pd.DataFrame([(loc,vehicle,'na',val) for loc in location_names
                       for vehicle in country_vehicle_map[loc]
                      for val in ['percent of population eating industrially produced vehicle',
       'percent of population eating vehicle']],
            columns=['location_name','vehicle','nutrient','value_description'])

sortvars = ['location_name','vehicle','value_description','nutrient']
target = target_a.append(target_b)
target = target[(target.vehicle.isin(vehicles)) & (target.nutrient.isin(nutrients))].sort_values(sortvars).set_index(sortvars)

In [18]:
target

location_name,vehicle,value_description,nutrient
Burkina Faso,maize flour,percent of population eating fortified vehicle,folic acid
Burkina Faso,maize flour,percent of population eating fortified vehicle,iron
Burkina Faso,maize flour,percent of population eating fortified vehicle,vitamin a
Burkina Faso,maize flour,percent of population eating fortified vehicle,zinc
Burkina Faso,oil,percent of population eating fortified vehicle,vitamin a
Burkina Faso,wheat flour,percent of population eating fortified vehicle,folic acid
Burkina Faso,wheat flour,percent of population eating fortified vehicle,iron
Burkina Faso,wheat flour,percent of population eating fortified vehicle,vitamin a
Burkina Faso,wheat flour,percent of population eating fortified vehicle,zinc
Kenya,maize flour,percent of population eating fortified vehicle,folic acid


In [19]:
target = target.reset_index()
rcols = target.columns.tolist()

In [20]:
check_cols = ['location_id','location_name','urbanicity','subnational_name','vehicle','value_description','nutrient','value_mean','value_025_percentile',
       'value_975_percentile','sub_population','source_year','notes','source_citation','source_link','inclusion_justification','included','data_choice_notes']

def filter_data(country, vehicle, val):    
    output = df.loc[(df.location_name==country)
           & (df.vehicle==vehicle)
           & (df.value_description==val)
           & (df.value_mean.notna()),check_cols]
    
    return output

In [21]:
def check_one_country(country):
    vehicles = ['oil', 'wheat flour', 'salt', 'maize flour', 'rice', 'bouillon']
    values_gold = ['percent of population eating fortified vehicle',
               'percent of population eating industrially produced vehicle',
               'percent of population eating vehicle']
    return pd.concat([filter_data(country, vehicle, val) for vehicle in vehicles for val in values_gold])

In [22]:
usecols = ['location_id','location_name','subnational_name','vehicle','value_description','nutrient','value_mean', 'value_025_percentile',
       'value_975_percentile']
subset_data = {}

In [23]:
for i in location_names:
    subset_data[i] = pd.DataFrame()

## Kenya

In [24]:
kenya = check_one_country("Kenya")

kenya.value_description.unique()

Passing list-likes to .loc or [] with any missing label will raise
KeyError in the future, you can use .reindex() as an alternative.

See the documentation here:
https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#deprecate-loc-reindex-listlike
  return self._getitem_tuple(key)


array(['percent of population eating industrially produced vehicle',
       'percent of population eating vehicle'], dtype=object)

In [25]:
mult_estimates[(mult_estimates.location_name=="Kenya")]

Unnamed: 0,location_name,vehicle,nutrient,standard,value_description,value_mean,estimation_status
511,Kenya,wheat flour,iron,Mandatory,percent of population eating fortified vehicle,33.83575,multiplicative
512,Kenya,wheat flour,zinc,Mandatory,percent of population eating fortified vehicle,34.703333,multiplicative
513,Kenya,wheat flour,folic acid,Mandatory,percent of population eating fortified vehicle,34.703333,multiplicative
514,Kenya,wheat flour,vitamin b12,Mandatory,percent of population eating fortified vehicle,34.703333,multiplicative
515,Kenya,wheat flour,vitamin b1,Mandatory,percent of population eating fortified vehicle,34.703333,multiplicative
516,Kenya,wheat flour,vitamin a,Mandatory,percent of population eating fortified vehicle,34.703333,multiplicative
518,Kenya,oil,vitamin d,Unknown,percent of population eating fortified vehicle,0.0,multiplicative
519,Kenya,maize flour,iron,Mandatory,percent of population eating fortified vehicle,5.317153,multiplicative
520,Kenya,maize flour,zinc,Mandatory,percent of population eating fortified vehicle,0.0,multiplicative
521,Kenya,maize flour,folic acid,Mandatory,percent of population eating fortified vehicle,0.0,multiplicative


In [26]:
kenya.groupby(['vehicle','value_description']).mean()

Unnamed: 0_level_0,Unnamed: 1_level_0,location_id
vehicle,value_description,Unnamed: 2_level_1
maize flour,percent of population eating industrially produced vehicle,
maize flour,percent of population eating vehicle,
oil,percent of population eating industrially produced vehicle,
oil,percent of population eating vehicle,
wheat flour,percent of population eating industrially produced vehicle,
wheat flour,percent of population eating vehicle,


In [27]:
kenya.loc[(kenya.vehicle=="maize flour") & (kenya.value_description=="percent of population eating industrially produced vehicle"),
         "data_choice_notes"] = "Only one source."

subset_data["Kenya"] = subset_data['Kenya'].append(kenya[(kenya.vehicle=="maize flour") & (kenya.value_description=="percent of population eating industrially produced vehicle")])

kenya[(kenya.vehicle=="maize flour") & (kenya.value_description=="percent of population eating industrially produced vehicle")]

Unnamed: 0,location_id,location_name,urbanicity,subnational_name,vehicle,value_description,nutrient,value_mean,value_025_percentile,value_975_percentile,sub_population,source_year,notes,source_citation,source_link,inclusion_justification,included,data_choice_notes
432,,Kenya,mixed/both,na,maize flour,percent of population eating industrially prod...,na,36.67,,,total population,2005,nationally representative survey: proportion o...,"Central Bureau of Statistics (Kenya), UK Depar...",NID 7375,,,Only one source.


In [28]:
kenya.loc[(kenya.vehicle=="maize flour") 
      & (kenya.value_description=="percent of population eating vehicle")
      & (kenya.source_link=="https://www.ncbi.nlm.nih.gov/pmc/articles/PMC5066654/"),
     "data_choice_notes"] = "Excluded central bureau of stats total pop number (85%) from 2005 in lieu of Ferguson rural u5 number from 2015. Note 72% of kenyan population is rural according to the world bank: https://data.worldbank.org/indicator/SP.RUR.TOTL.ZS?locations=KE"

subset_data["Kenya"] = subset_data["Kenya"].append(
kenya.loc[(kenya.vehicle=="maize flour") 
      & (kenya.value_description=="percent of population eating vehicle")
      & (kenya.source_link=="https://www.ncbi.nlm.nih.gov/pmc/articles/PMC5066654/")]
)


kenya[(kenya.vehicle=="maize flour") 
      & (kenya.value_description=="percent of population eating vehicle")]


Unnamed: 0,location_id,location_name,urbanicity,subnational_name,vehicle,value_description,nutrient,value_mean,value_025_percentile,value_975_percentile,sub_population,source_year,notes,source_citation,source_link,inclusion_justification,included,data_choice_notes
447,,Kenya,mixed/both,na,maize flour,percent of population eating vehicle,na,85.27,,,total population,2005,nationally representative survey: proportion o...,"Central Bureau of Statistics (Kenya), UK Depar...",NID 7375,,,
503,,Kenya,rural,Kitui,maize flour,percent of population eating vehicle,na,57.0,,,6-8 months,2015,population = children who were breastfeeding. ...,"Ferguson, Elaine et al. “Zinc, iron and calciu...",https://www.ncbi.nlm.nih.gov/pmc/articles/PMC5...,,,Excluded central bureau of stats total pop num...
504,,Kenya,rural,Kitui,maize flour,percent of population eating vehicle,na,63.0,,,9-11 months,2015,population = children who were breastfeeding. ...,"Ferguson, Elaine et al. “Zinc, iron and calciu...",https://www.ncbi.nlm.nih.gov/pmc/articles/PMC5...,,,Excluded central bureau of stats total pop num...
505,,Kenya,rural,Kitui,maize flour,percent of population eating vehicle,na,81.0,,,12-23 months,2015,population = children who were breastfeeding. ...,"Ferguson, Elaine et al. “Zinc, iron and calciu...",https://www.ncbi.nlm.nih.gov/pmc/articles/PMC5...,,,Excluded central bureau of stats total pop num...
506,,Kenya,rural,Kitui,maize flour,percent of population eating vehicle,na,16.0,,,6-8 months,2015,population = children who were breastfeeding. ...,"Ferguson, Elaine et al. “Zinc, iron and calciu...",https://www.ncbi.nlm.nih.gov/pmc/articles/PMC5...,,,Excluded central bureau of stats total pop num...
507,,Kenya,rural,Kitui,maize flour,percent of population eating vehicle,na,38.0,,,9-11 months,2015,population = children who were breastfeeding. ...,"Ferguson, Elaine et al. “Zinc, iron and calciu...",https://www.ncbi.nlm.nih.gov/pmc/articles/PMC5...,,,Excluded central bureau of stats total pop num...
508,,Kenya,rural,Kitui,maize flour,percent of population eating vehicle,na,17.0,,,12-23 months,2015,population = children who were breastfeeding. ...,"Ferguson, Elaine et al. “Zinc, iron and calciu...",https://www.ncbi.nlm.nih.gov/pmc/articles/PMC5...,,,Excluded central bureau of stats total pop num...
509,,Kenya,rural,Vihiga,maize flour,percent of population eating vehicle,na,89.0,,,6-8 months,2015,population = children who were breastfeeding. ...,"Ferguson, Elaine et al. “Zinc, iron and calciu...",https://www.ncbi.nlm.nih.gov/pmc/articles/PMC5...,,,Excluded central bureau of stats total pop num...
510,,Kenya,rural,Vihiga,maize flour,percent of population eating vehicle,na,91.0,,,9-11 months,2015,population = children who were breastfeeding. ...,"Ferguson, Elaine et al. “Zinc, iron and calciu...",https://www.ncbi.nlm.nih.gov/pmc/articles/PMC5...,,,Excluded central bureau of stats total pop num...
511,,Kenya,rural,Vihiga,maize flour,percent of population eating vehicle,na,95.0,,,12-23 months,2015,population = children who were breastfeeding. ...,"Ferguson, Elaine et al. “Zinc, iron and calciu...",https://www.ncbi.nlm.nih.gov/pmc/articles/PMC5...,,,Excluded central bureau of stats total pop num...


In [29]:
# kenya.loc[(kenya.vehicle=="maize flour") & (kenya.value_description=="percent of population eating industrially produced vehicle"),
#          "data_choice_notes"] = "Only one source."

# subset_data["Kenya"] = subset_data['Kenya'].append(kenya[(kenya.vehicle=="maize flour") & (kenya.value_description=="percent of population eating industrially produced vehicle")])

# kenya[(kenya.vehicle=="maize flour") & (kenya.value_description=="percent of population eating industrially produced vehicle")]

In [30]:
kenya.loc[(kenya.vehicle=="oil") 
      & (kenya.value_description=="percent of population eating industrially produced vehicle")
          ,"data_choice_notes"] = "Only one source."

subset_data['Kenya'] = subset_data['Kenya'].append(
kenya[(kenya.vehicle=="oil") 
      & (kenya.value_description=="percent of population eating industrially produced vehicle")]
)

kenya[(kenya.vehicle=="oil") 
      & (kenya.value_description=="percent of population eating industrially produced vehicle")]

Unnamed: 0,location_id,location_name,urbanicity,subnational_name,vehicle,value_description,nutrient,value_mean,value_025_percentile,value_975_percentile,sub_population,source_year,notes,source_citation,source_link,inclusion_justification,included,data_choice_notes
445,,Kenya,mixed/both,na,oil,percent of population eating industrially prod...,na,9.39,,,total population,2005,nationally representative survey: proportion o...,"Central Bureau of Statistics (Kenya), UK Depar...",NID 7375,,,Only one source.


In [31]:
kenya.loc[(kenya.vehicle=="oil") 
      & (kenya.value_description=="percent of population eating vehicle")
      & (kenya.source_link=="https://www.ncbi.nlm.nih.gov/pmc/articles/PMC5066654/"),
     "data_choice_notes"] = "Excluded central bureau of stats total pop number (16%) from 2005 in lieu of Ferguson rural u5 number from 2015. Note 72% of kenyan population is rural according to the world bank: https://data.worldbank.org/indicator/SP.RUR.TOTL.ZS?locations=KE"

subset_data["Kenya"] = subset_data["Kenya"].append(
kenya.loc[(kenya.vehicle=="oil") 
      & (kenya.value_description=="percent of population eating vehicle")
      & (kenya.source_link=="https://www.ncbi.nlm.nih.gov/pmc/articles/PMC5066654/")]
)


kenya[(kenya.vehicle=="oil") 
      & (kenya.value_description=="percent of population eating vehicle")]


Unnamed: 0,location_id,location_name,urbanicity,subnational_name,vehicle,value_description,nutrient,value_mean,value_025_percentile,value_975_percentile,sub_population,source_year,notes,source_citation,source_link,inclusion_justification,included,data_choice_notes
449,,Kenya,mixed/both,na,oil,percent of population eating vehicle,na,16.17,,,total population,2005,nationally representative survey: proportion o...,"Central Bureau of Statistics (Kenya), UK Depar...",NID 7375,,,
514,,Kenya,rural,Kitui,oil,percent of population eating vehicle,na,6.0,,,6-8 months,2015,population = children who were breastfeeding. ...,"Ferguson, Elaine et al. “Zinc, iron and calciu...",https://www.ncbi.nlm.nih.gov/pmc/articles/PMC5...,,,Excluded central bureau of stats total pop num...
515,,Kenya,rural,Kitui,oil,percent of population eating vehicle,na,16.0,,,9-11 months,2015,population = children who were breastfeeding. ...,"Ferguson, Elaine et al. “Zinc, iron and calciu...",https://www.ncbi.nlm.nih.gov/pmc/articles/PMC5...,,,Excluded central bureau of stats total pop num...
516,,Kenya,rural,Kitui,oil,percent of population eating vehicle,na,7.0,,,12-23 months,2015,population = children who were breastfeeding. ...,"Ferguson, Elaine et al. “Zinc, iron and calciu...",https://www.ncbi.nlm.nih.gov/pmc/articles/PMC5...,,,Excluded central bureau of stats total pop num...
517,,Kenya,rural,Kitui,oil,percent of population eating vehicle,na,12.0,,,6-8 months,2015,population = children who were breastfeeding. ...,"Ferguson, Elaine et al. “Zinc, iron and calciu...",https://www.ncbi.nlm.nih.gov/pmc/articles/PMC5...,,,Excluded central bureau of stats total pop num...
518,,Kenya,rural,Kitui,oil,percent of population eating vehicle,na,38.0,,,9-11 months,2015,population = children who were breastfeeding. ...,"Ferguson, Elaine et al. “Zinc, iron and calciu...",https://www.ncbi.nlm.nih.gov/pmc/articles/PMC5...,,,Excluded central bureau of stats total pop num...
519,,Kenya,rural,Kitui,oil,percent of population eating vehicle,na,45.0,,,12-23 months,2015,population = children who were breastfeeding. ...,"Ferguson, Elaine et al. “Zinc, iron and calciu...",https://www.ncbi.nlm.nih.gov/pmc/articles/PMC5...,,,Excluded central bureau of stats total pop num...
520,,Kenya,rural,Vihiga,oil,percent of population eating vehicle,na,60.0,,,6-8 months,2015,population = children who were breastfeeding. ...,"Ferguson, Elaine et al. “Zinc, iron and calciu...",https://www.ncbi.nlm.nih.gov/pmc/articles/PMC5...,,,Excluded central bureau of stats total pop num...
521,,Kenya,rural,Vihiga,oil,percent of population eating vehicle,na,70.0,,,9-11 months,2015,population = children who were breastfeeding. ...,"Ferguson, Elaine et al. “Zinc, iron and calciu...",https://www.ncbi.nlm.nih.gov/pmc/articles/PMC5...,,,Excluded central bureau of stats total pop num...
522,,Kenya,rural,Vihiga,oil,percent of population eating vehicle,na,72.0,,,12-23 months,2015,population = children who were breastfeeding. ...,"Ferguson, Elaine et al. “Zinc, iron and calciu...",https://www.ncbi.nlm.nih.gov/pmc/articles/PMC5...,,,Excluded central bureau of stats total pop num...


In [32]:
kenya.loc[(kenya.vehicle=="wheat flour") & (kenya.value_description=="percent of population eating industrially produced vehicle"),
         "data_choice_notes"] = "Only one source."

subset_data["Kenya"] = subset_data['Kenya'].append(kenya[(kenya.vehicle=="wheat flour") & (kenya.value_description=="percent of population eating industrially produced vehicle")])

kenya[(kenya.vehicle=="wheat flour") & (kenya.value_description=="percent of population eating industrially produced vehicle")]

Unnamed: 0,location_id,location_name,urbanicity,subnational_name,vehicle,value_description,nutrient,value_mean,value_025_percentile,value_975_percentile,sub_population,source_year,notes,source_citation,source_link,inclusion_justification,included,data_choice_notes
444,,Kenya,mixed/both,na,wheat flour,percent of population eating industrially prod...,na,30.95,,,total population,2005,nationally representative survey: proportion o...,"Central Bureau of Statistics (Kenya), UK Depar...",NID 7375,,,Only one source.


In [33]:
kenya.loc[(kenya.vehicle=="wheat flour") & (kenya.value_description=="percent of population eating vehicle"),
         "data_choice_notes"] = "Only one source."

subset_data["Kenya"] = subset_data['Kenya'].append(kenya[(kenya.vehicle=="wheat flour") & (kenya.value_description=="percent of population eating vehicle")])

kenya[(kenya.vehicle=="wheat flour") & (kenya.value_description=="percent of population eating vehicle")]

Unnamed: 0,location_id,location_name,urbanicity,subnational_name,vehicle,value_description,nutrient,value_mean,value_025_percentile,value_975_percentile,sub_population,source_year,notes,source_citation,source_link,inclusion_justification,included,data_choice_notes
448,,Kenya,mixed/both,na,wheat flour,percent of population eating vehicle,na,36.58,,,total population,2005,nationally representative survey: proportion o...,"Central Bureau of Statistics (Kenya), UK Depar...",NID 7375,,,Only one source.


In [34]:
#need pct eating fortified for oil, wheat, maize
subset_data["Kenya"] = subset_data['Kenya'].append(
    mult_estimates[(mult_estimates.location_name=="Kenya") & (mult_estimates.nutrient.isin(nutrients))]
)

of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.


  sort=sort,


In [35]:
path_oil_estimates = '/ihme/homes/beatrixh/vivarium_research_lsff/data_prep/outputs/pct_eating_fortified_oil_regression_estimates_3_22_2021.csv'
fort_oil = pd.read_csv(path_oil_estimates)

draws = [f'draw_{i}' for i in range(500)]

fort_oil = fort_oil.groupby(['location_name','vehicle']).mean().reset_index()
fort_oil['value_mean'] = fort_oil[draws].mean(axis=1)
fort_oil['value_025_percentile'] = fort_oil[draws].quantile(.025, axis=1)
fort_oil['value_975_percentile'] = fort_oil[draws].quantile(.975, axis=1)

fort_oil = fort_oil[['location_name','vehicle','value_mean']]

In [36]:
fort_oil[fort_oil.location_name=="Kenya"]

Unnamed: 0,location_name,vehicle,value_mean
12,Kenya,oil,0.0


In [37]:
ky_maize_est = pd.DataFrame({
    'location_name':['Kenya'],
    'value_description':['percent of population eating fortified vehicle'],
    'vehicle':['oil'],
    'nutrient':['vitamin a'],
    'value_mean':[0],
    'estimation_status':['regression']
})

subset_data['Kenya'] = subset_data['Kenya'].append(
    ky_maize_est
)

ky_maize_est

Unnamed: 0,location_name,value_description,vehicle,nutrient,value_mean,estimation_status
0,Kenya,percent of population eating fortified vehicle,oil,vitamin a,0,regression


## Myanmar

In [38]:
myanmar = check_one_country("Myanmar")

myanmar.groupby(['vehicle','value_description']).mean()

Unnamed: 0_level_0,Unnamed: 1_level_0,location_id
vehicle,value_description,Unnamed: 2_level_1
oil,percent of population eating industrially produced vehicle,


In [39]:
target[target.location_name=="Myanmar"]

Unnamed: 0,location_name,vehicle,value_description,nutrient
18,Myanmar,oil,percent of population eating fortified vehicle,vitamin a
19,Myanmar,wheat flour,percent of population eating fortified vehicle,folic acid
20,Myanmar,wheat flour,percent of population eating fortified vehicle,iron
21,Myanmar,wheat flour,percent of population eating fortified vehicle,vitamin a
22,Myanmar,wheat flour,percent of population eating fortified vehicle,zinc


In [40]:
myanmar

Unnamed: 0,location_id,location_name,urbanicity,subnational_name,vehicle,value_description,nutrient,value_mean,value_025_percentile,value_975_percentile,sub_population,source_year,notes,source_citation,source_link,inclusion_justification,included,data_choice_notes
595,,Myanmar,,na,oil,percent of population eating industrially prod...,na,10,0,20,total population,,,,Flander’s investment trade report,,,


In [41]:
myanmar.loc[(myanmar.vehicle=="oil")
            & (myanmar.value_description=="percent of population eating industrially produced vehicle")
            ,"data_choice_notes"] = "Only one source."
subset_data['Myanmar'] = subset_data['Myanmar'].append(myanmar.loc[(myanmar.vehicle=="oil")
            & (myanmar.value_description=="percent of population eating industrially produced vehicle")])

myanmar.loc[(myanmar.vehicle=="oil")
            & (myanmar.value_description=="percent of population eating industrially produced vehicle")]

Unnamed: 0,location_id,location_name,urbanicity,subnational_name,vehicle,value_description,nutrient,value_mean,value_025_percentile,value_975_percentile,sub_population,source_year,notes,source_citation,source_link,inclusion_justification,included,data_choice_notes
595,,Myanmar,,na,oil,percent of population eating industrially prod...,na,10,0,20,total population,,,,Flander’s investment trade report,,,Only one source.


In [42]:
assum.loc[(assum.location_name=="Myanmar") 
      & (assum.value_description=="percent of population eating fortified vehicle") 
      & (assum.vehicle=="oil")
     & (assum.nutrient.isin(nutrients)),
         "data_choice_notes"] = "Unable to find data; assuming 0; justification: lack of reports on fortification activities."

subset_data['Myanmar'] = subset_data['Myanmar'].append(
assum[(assum.location_name=="Myanmar") 
      & (assum.value_description=="percent of population eating fortified vehicle") 
      & (assum.vehicle=="oil")
     & (assum.nutrient.isin(nutrients))]
)

assum[(assum.location_name=="Myanmar") 
      & (assum.value_description=="percent of population eating fortified vehicle") 
      & (assum.vehicle=="oil")
     & (assum.nutrient.isin(nutrients))]

Unnamed: 0,location_id,location_name,vehicle,nutrient,value_description,value_mean,value_025_percentile,value_975_percentile,sub_population,notes,Unnamed: 10,data_choice_notes
3,15,Myanmar,oil,vitamin a,percent of population eating fortified vehicle,0,0,5,total population,Justification - lack of reports on fortificati...,,Unable to find data; assuming 0; justification...


In [43]:
# use regression estimate
assum[(assum.location_name=="Myanmar") 
      & (assum.value_description=="percent of population eating industrially produced vehicle") 
      & (assum.vehicle=="wheat flour")]

Unnamed: 0,location_id,location_name,vehicle,nutrient,value_description,value_mean,value_025_percentile,value_975_percentile,sub_population,notes,Unnamed: 10,data_choice_notes


In [44]:
# ... take this?
assum.loc[(assum.location_name=="Myanmar") 
      & (assum.value_description=="percent of population eating vehicle") 
      & (assum.vehicle=="wheat flour"),
         "data_choice_notes"] = "Made assumption due to lack of data. Justification: FAO food balance sheets and general indication that rice is much more common"

subset_data['Myanmar'] = subset_data['Myanmar'].append(
assum[(assum.location_name=="Myanmar") 
      & (assum.value_description=="percent of population eating vehicle") 
      & (assum.vehicle=="wheat flour")]
)

assum[(assum.location_name=="Myanmar") 
      & (assum.value_description=="percent of population eating vehicle") 
      & (assum.vehicle=="wheat flour")]

Unnamed: 0,location_id,location_name,vehicle,nutrient,value_description,value_mean,value_025_percentile,value_975_percentile,sub_population,notes,Unnamed: 10,data_choice_notes
8,15,Myanmar,wheat flour,na,percent of population eating vehicle,5,0,10,total population,FAO food balance sheets and general indication...,,Made assumption due to lack of data. Justifica...


In [45]:
assum.loc[(assum.location_name=="Myanmar") 
      & (assum.value_description=="percent of population eating fortified vehicle") 
      & (assum.vehicle=="wheat flour")
     & (assum.nutrient.isin(nutrients)),
         "data_choice_notes"] = "Unable to find data; assuming 0; justification: lack of reports on fortification activities."

subset_data['Myanmar'] = subset_data['Myanmar'].append(
assum[(assum.location_name=="Myanmar") 
      & (assum.value_description=="percent of population eating fortified vehicle") 
      & (assum.vehicle=="wheat flour")
     & (assum.nutrient.isin(nutrients))]
)

assum[(assum.location_name=="Myanmar") 
      & (assum.value_description=="percent of population eating fortified vehicle") 
      & (assum.vehicle=="wheat flour")
     & (assum.nutrient.isin(nutrients))]

Unnamed: 0,location_id,location_name,vehicle,nutrient,value_description,value_mean,value_025_percentile,value_975_percentile,sub_population,notes,Unnamed: 10,data_choice_notes
5,15,Myanmar,wheat flour,folic acid,percent of population eating fortified vehicle,0,0,5,total population,Justification - lack of reports on fortificati...,,Unable to find data; assuming 0; justification...
6,15,Myanmar,wheat flour,iron,percent of population eating fortified vehicle,0,0,5,total population,Justification - lack of reports on fortificati...,,Unable to find data; assuming 0; justification...
7,15,Myanmar,wheat flour,zinc,percent of population eating fortified vehicle,0,0,5,total population,Justification - lack of reports on fortificati...,,Unable to find data; assuming 0; justification...


In [46]:
mult_estimates[(mult_estimates.location_name=="Myanmar")]

Unnamed: 0,location_name,vehicle,nutrient,standard,value_description,value_mean,estimation_status
200,Myanmar,wheat flour,,Unknown,percent of population eating industrially prod...,4.5,multiplicative
554,Myanmar,wheat flour,vitamin b12,Unknown,percent of population eating fortified vehicle,0.0,multiplicative
555,Myanmar,wheat flour,vitamin b1,Unknown,percent of population eating fortified vehicle,0.0,multiplicative


In [47]:
subset_data['Myanmar'] = subset_data['Myanmar'].append(
    mult_estimates[(mult_estimates.location_name=="Myanmar")
               & (mult_estimates.vehicle=="wheat flour")
               & (mult_estimates.value_description=="percent of population eating industrially produced vehicle")]
)

mult_estimates[(mult_estimates.location_name=="Myanmar")
               & (mult_estimates.vehicle=="wheat flour")
               & (mult_estimates.value_description=="percent of population eating industrially produced vehicle")]

Unnamed: 0,location_name,vehicle,nutrient,standard,value_description,value_mean,estimation_status
200,Myanmar,wheat flour,,Unknown,percent of population eating industrially prod...,4.5,multiplicative


In [48]:
outputs_dir = '/ihme/homes/beatrixh/vivarium_research_lsff/data_prep/outputs/'

pct_eating = pd.read_csv(outputs_dir + 'pct_eating_regression_estimates_3_15_2021.csv')
pct_ind_prod = pd.read_csv(outputs_dir + 'pct_eating_ind_prod_regression_estimates_3_15_2021.csv')
pct_fortified = pd.read_csv(outputs_dir + 'pct_eating_fortified_regression_estimates_3_15_2021.csv')

In [49]:
pct_eating[pct_eating.location_name=="Myanmar"]

Unnamed: 0,location_name,vehicle,percent_of_population_eating_vehicle
97,Myanmar,oil,50.0
98,Myanmar,oil,50.0


In [50]:
pct_ind_prod[pct_ind_prod.location_name=="Myanmar"]

Unnamed: 0,location_name,vehicle,percent_of_population_eating_industrially_produced_vehicle
15,Myanmar,oil,10.0


In [51]:
pct_fortified[pct_fortified.location_name=="Myanmar"]

Unnamed: 0,location_name,vehicle,percent_of_population_eating_fortified_vehicle
15,Myanmar,oil,0.0


In [52]:
# COME BACK TO MYANMAR THIS IS A MESS

In [53]:
location_names

['Kenya', 'Burkina Faso', 'Myanmar', 'Vietnam', 'Nepal']

In [54]:
path_wheat_estimates = '/ihme/homes/beatrixh/vivarium_research_lsff/data_prep/outputs/pct_eating_fortified_wheat_regression_estimates_3_22_2021.csv'
fort_wheat = pd.read_csv(path_wheat_estimates)

draws = [f'draw_{i}' for i in range(500)]

fort_wheat = fort_wheat.groupby(['location_name','vehicle']).mean().reset_index()
fort_wheat['value_mean'] = fort_wheat[draws].mean(axis=1)
fort_wheat['value_025_percentile'] = fort_wheat[draws].quantile(.025, axis=1)
fort_wheat['value_975_percentile'] = fort_wheat[draws].quantile(.975, axis=1)

fort_wheat = fort_wheat[['location_name','vehicle','value_mean']]

In [55]:
fort_wheat

Unnamed: 0,location_name,vehicle,value_mean
0,Angola,wheat flour,20.670828
1,Bangladesh,wheat flour,0.155875
2,Burkina Faso,wheat flour,12.388563
3,Cameroon,wheat flour,49.062051
4,China,wheat flour,3.575856
5,Côte d'Ivoire,wheat flour,10.599703
6,Democratic Republic of the Congo,wheat flour,0.039875
7,Egypt,wheat flour,22.463039
8,Ethiopia,wheat flour,0.0
9,Ghana,wheat flour,34.920558


In [56]:
my_wheat_est = pd.DataFrame({
    'location_name':['Myanmar'],
    'value_description':['percent of population eating fortified vehicle'],
    'vehicle':['wheat flour'],
    'nutrient':['vitamin a'],
    'value_mean':[0],
    'estimation_status':['regression']
})

subset_data['Myanmar'] = subset_data['Myanmar'].append(
    my_wheat_est
)

my_wheat_est

Unnamed: 0,location_name,value_description,vehicle,nutrient,value_mean,estimation_status
0,Myanmar,percent of population eating fortified vehicle,wheat flour,vitamin a,0,regression


## Vietnam

In [57]:
vietnam = check_one_country("Viet Nam")

vietnam.groupby(['vehicle','value_description']).mean()

Unnamed: 0_level_0,Unnamed: 1_level_0,location_id
vehicle,value_description,Unnamed: 2_level_1
oil,percent of population eating industrially produced vehicle,
oil,percent of population eating vehicle,
wheat flour,percent of population eating vehicle,


In [58]:
## nathaniel already populated data_choice_notes
vietnam.data_choice_notes = vietnam.inclusion_justification

In [59]:
location_names

['Kenya', 'Burkina Faso', 'Myanmar', 'Vietnam', 'Nepal']

In [60]:
subset_data['Vietnam'] = subset_data['Vietnam'].append(vietnam[(vietnam.vehicle=="oil") & (vietnam.value_description=="percent of population eating industrially produced vehicle")]) 

vietnam[(vietnam.vehicle=="oil") & (vietnam.value_description=="percent of population eating industrially produced vehicle")]

Unnamed: 0,location_id,location_name,urbanicity,subnational_name,vehicle,value_description,nutrient,value_mean,value_025_percentile,value_975_percentile,sub_population,source_year,notes,source_citation,source_link,inclusion_justification,included,data_choice_notes
529,,Viet Nam,,na,oil,percent of population eating industrially prod...,na,90,,,total population,2018,"""WHO and UNICEF note that the decree has not b...",,https://www.acutemarketreports.com/report/viet...,Only one source.,True,Only one source.


In [61]:
subset_data['Vietnam'] = subset_data['Vietnam'].append(
vietnam[(vietnam.vehicle=="oil") & (vietnam.value_description=="percent of population eating vehicle") & (vietnam.included==True)]
)

vietnam[(vietnam.vehicle=="oil") & (vietnam.value_description=="percent of population eating vehicle") & (vietnam.included==True)]


Unnamed: 0,location_id,location_name,urbanicity,subnational_name,vehicle,value_description,nutrient,value_mean,value_025_percentile,value_975_percentile,sub_population,source_year,notes,source_citation,source_link,inclusion_justification,included,data_choice_notes
532,,Viet Nam,mixed/both,na,oil,percent of population eating vehicle,na,38.3,,,under-5,2010,Under-5 defined as 6-60 months. Has other pote...,"Laillou, A. et al. “An Assessment of the Impac...",https://www.ncbi.nlm.nih.gov/pmc/articles/PMC3...,Only one source. Used value for all under-5 (3...,True,Only one source. Used value for all under-5 (3...


In [62]:
subset_data['Vietnam'] = subset_data['Vietnam'].append(vietnam[(vietnam.vehicle=="wheat flour") & (vietnam.value_description=="percent of population eating vehicle") & (vietnam.included==True)])

vietnam[(vietnam.vehicle=="wheat flour") & (vietnam.value_description=="percent of population eating vehicle") & (vietnam.included==True)]

Unnamed: 0,location_id,location_name,urbanicity,subnational_name,vehicle,value_description,nutrient,value_mean,value_025_percentile,value_975_percentile,sub_population,source_year,notes,source_citation,source_link,inclusion_justification,included,data_choice_notes
545,,Viet Nam,mixed/both,na,wheat flour,percent of population eating vehicle,na,38.8,,,women of reproductive age,2010,WRA defined as 19-50 years. Has other potentia...,Improvement of the Vietnamese Diet for Women o...,https://journals.plos.org/plosone/article/file...,Only one source. Using value for all WRA (38.8...,True,Only one source. Using value for all WRA (38.8...
561,,Viet Nam,mixed/both,na,wheat flour,percent of population eating vehicle,na,16.4,,,under-5,2010,Under-5 defined as 6-60 months. Has other pote...,"Laillou, A. et al. “An Assessment of the Impac...",https://www.ncbi.nlm.nih.gov/pmc/articles/PMC3...,Only one source. Using value for all under-5 (...,True,Only one source. Using value for all under-5 (...


In [63]:
assum.location_name.unique()

array(['Ethiopia', 'Myanmar', 'India'], dtype=object)

In [64]:
## need regression estimate
mult_estimates[(mult_estimates.location_name=="Vietnam") 
               & (mult_estimates.vehicle=="oil") 
               & mult_estimates.nutrient.isin(nutrients)]

Unnamed: 0,location_name,vehicle,nutrient,standard,value_description,value_mean,estimation_status


In [65]:
subset_data['Vietnam'] = subset_data['Vietnam'].append(
mult_estimates[(mult_estimates.location_name=="Vietnam") 
               & (mult_estimates.value_description=="percent of population eating industrially produced vehicle")
               & (mult_estimates.standard=="Mandatory")]
)

mult_estimates[(mult_estimates.location_name=="Vietnam") 
               & (mult_estimates.value_description=="percent of population eating industrially produced vehicle")
               & (mult_estimates.standard=="Mandatory")]

Unnamed: 0,location_name,vehicle,nutrient,standard,value_description,value_mean,estimation_status
316,Vietnam,wheat flour,,Mandatory,percent of population eating industrially prod...,21.93,multiplicative


In [66]:
mult_estimates[(mult_estimates.location_name=="Vietnam") & (mult_estimates.nutrient=="iron")]

Unnamed: 0,location_name,vehicle,nutrient,standard,value_description,value_mean,estimation_status
667,Vietnam,wheat flour,iron,Mandatory,percent of population eating fortified vehicle,4.111875,multiplicative


In [67]:
#HARD CODE IN ASSUMING IRON VALUE FOR ALL NUTRIENTS
iron_assumption = pd.concat([mult_estimates[(mult_estimates.location_name=="Vietnam") & (mult_estimates.nutrient=="iron")]]*4)
iron_assumption['nutrient'] = ['iron','folic acid','zinc','vitamin a']

subset_data['Vietnam'] = subset_data['Vietnam'].append(iron_assumption)

iron_assumption

Unnamed: 0,location_name,vehicle,nutrient,standard,value_description,value_mean,estimation_status
667,Vietnam,wheat flour,iron,Mandatory,percent of population eating fortified vehicle,4.111875,multiplicative
667,Vietnam,wheat flour,folic acid,Mandatory,percent of population eating fortified vehicle,4.111875,multiplicative
667,Vietnam,wheat flour,zinc,Mandatory,percent of population eating fortified vehicle,4.111875,multiplicative
667,Vietnam,wheat flour,vitamin a,Mandatory,percent of population eating fortified vehicle,4.111875,multiplicative


In [68]:
vietnam_reg_est = pct_fortified[pct_fortified.location_name=="Viet Nam"]

vietnam_reg_est = vietnam_reg_est.rename(columns={'percent_of_population_eating_fortified_vehicle':'value_mean'})
vietnam_reg_est['value_description'] = 'percent of population eating fortified vehicle'
vietnam_reg_est['nutrient'] = "vitamin a"
vietnam_reg_est['estimation_status'] = "regression"
vietnam_reg_est['location_name'] = "Vietnam"

subset_data['Vietnam'] = subset_data['Vietnam'].append(vietnam_reg_est)
vietnam_reg_est

Unnamed: 0,location_name,vehicle,value_mean,value_description,nutrient,estimation_status
24,Vietnam,oil,0.0,percent of population eating fortified vehicle,vitamin a,regression


## Nepal

In [69]:
nepal = check_one_country("Nepal")

nepal.groupby(['vehicle','value_description']).mean()

Unnamed: 0_level_0,Unnamed: 1_level_0,location_id
vehicle,value_description,Unnamed: 2_level_1
oil,percent of population eating vehicle,
wheat flour,percent of population eating industrially produced vehicle,
wheat flour,percent of population eating vehicle,


In [70]:
## nathaniel already populated data_choice_notes
nepal.data_choice_notes = nepal.inclusion_justification

In [71]:
subset_data['Nepal'] = subset_data['Nepal'].append(
    nepal[(nepal.included==True)]
)
nepal[(nepal.included==True)]

Unnamed: 0,location_id,location_name,urbanicity,subnational_name,vehicle,value_description,nutrient,value_mean,value_025_percentile,value_975_percentile,sub_population,source_year,notes,source_citation,source_link,inclusion_justification,included,data_choice_notes
1039,,Nepal,mixed/both,na,oil,percent of population eating vehicle,na,99.8,99.8,99.9,total population,2016,Reported Households Used Cooking Oil to Cook o...,GHDx,http://internal-ghdx.healthdata.org/record/nep...,"Keeping overall estimate (99.8%), discarding e...",True,"Keeping overall estimate (99.8%), discarding e..."
948,,Nepal,mixed/both,na,wheat flour,percent of population eating industrially prod...,na,43.0,na,na,total population,2016,Proportion of population consuming industriall...,GFDx,https://fortificationdata.org/country-fortific...,Keeping GFDx number (43%) from Nepal National ...,True,Keeping GFDx number (43%) from Nepal National ...
1037,,Nepal,urban,na,wheat flour,percent of population eating vehicle,na,52.1,45.8,58.4,total population,2016,Specifically Atta Wheat Flour Purchased and Av...,GHDx,http://internal-ghdx.healthdata.org/record/nep...,Using 52.1% of urban population eating Atta (w...,True,Using 52.1% of urban population eating Atta (w...


In [72]:
assum.location_name.unique()

array(['Ethiopia', 'Myanmar', 'India'], dtype=object)

In [73]:
subset_data['Nepal'] = subset_data['Nepal'].append(
    mult_estimates[(mult_estimates.location_name=="Nepal") & (mult_estimates.nutrient.isin(nutrients))]
)

mult_estimates[(mult_estimates.location_name=="Nepal") & (mult_estimates.nutrient.isin(nutrients))]

Unnamed: 0,location_name,vehicle,nutrient,standard,value_description,value_mean,estimation_status
558,Nepal,wheat flour,iron,Mandatory,percent of population eating fortified vehicle,7.334332,multiplicative
559,Nepal,wheat flour,folic acid,Mandatory,percent of population eating fortified vehicle,5.165022,multiplicative
560,Nepal,wheat flour,vitamin a,Mandatory,percent of population eating fortified vehicle,5.165022,multiplicative
561,Nepal,wheat flour,zinc,Unknown,percent of population eating fortified vehicle,0.0,multiplicative
564,Nepal,oil,vitamin a,Unknown,percent of population eating fortified vehicle,0.0,multiplicative


In [74]:
nepal_reg_est = pct_ind_prod[(pct_ind_prod.location_name=="Nepal")]

nepal_reg_est = nepal_reg_est.rename(columns={'percent_of_population_eating_industrially_produced_vehicle':'value_mean'})
nepal_reg_est['value_description'] = 'percent of population eating industrially produced vehicle'
nepal_reg_est['nutrient'] = "na"
nepal_reg_est['estimation_status'] = "regression"

subset_data['Nepal'] = subset_data['Nepal'].append(nepal_reg_est)
nepal_reg_est

Unnamed: 0,location_name,vehicle,value_mean,value_description,nutrient,estimation_status
16,Nepal,oil,74.74765,percent of population eating industrially prod...,na,regression


In [75]:
location_names

['Kenya', 'Burkina Faso', 'Myanmar', 'Vietnam', 'Nepal']

## Burkina Faso

In [76]:
bf = check_one_country("Burkina Faso")

bf.groupby(['vehicle','value_description']).mean()

Unnamed: 0_level_0,Unnamed: 1_level_0,location_id
vehicle,value_description,Unnamed: 2_level_1
bouillon,percent of population eating vehicle,
maize flour,percent of population eating vehicle,
oil,percent of population eating fortified vehicle,
oil,percent of population eating vehicle,
rice,percent of population eating vehicle,
wheat flour,percent of population eating vehicle,


In [77]:
target[target.location_name=="Burkina Faso"]

Unnamed: 0,location_name,vehicle,value_description,nutrient
0,Burkina Faso,maize flour,percent of population eating fortified vehicle,folic acid
1,Burkina Faso,maize flour,percent of population eating fortified vehicle,iron
2,Burkina Faso,maize flour,percent of population eating fortified vehicle,vitamin a
3,Burkina Faso,maize flour,percent of population eating fortified vehicle,zinc
4,Burkina Faso,oil,percent of population eating fortified vehicle,vitamin a
5,Burkina Faso,wheat flour,percent of population eating fortified vehicle,folic acid
6,Burkina Faso,wheat flour,percent of population eating fortified vehicle,iron
7,Burkina Faso,wheat flour,percent of population eating fortified vehicle,vitamin a
8,Burkina Faso,wheat flour,percent of population eating fortified vehicle,zinc


In [78]:
bf[(bf.vehicle=="maize flour")]

Unnamed: 0,location_id,location_name,urbanicity,subnational_name,vehicle,value_description,nutrient,value_mean,value_025_percentile,value_975_percentile,sub_population,source_year,notes,source_citation,source_link,inclusion_justification,included,data_choice_notes
758,,Burkina Faso,rural,Gnagna,maize flour,percent of population eating vehicle,na,0.2,,,women,2003,"24h recall during April, postharvest season wh...","Savy, M., Martin-Prével, Y., Traissac, P., Eym...",https://academic.oup.com/jn/article-abstract/1...,,,
759,,Burkina Faso,rural,Gnagna,maize flour,percent of population eating vehicle,na,39.4,,,women,2003,"24h recall during April, postharvest season wh...","Savy, M., Martin-Prével, Y., Traissac, P., Eym...",https://academic.oup.com/jn/article-abstract/1...,,,
768,,Burkina Faso,urban,Ougadougou,maize flour,percent of population eating vehicle,na,39.0,,,total population,2007,two non-consecutive 24h recalls were performed...,"Becquey E, Delpeuch F, Konaté AM, Delsol H, La...",https://pubmed.ncbi.nlm.nih.gov/22017887/,,,
769,,Burkina Faso,urban,Ougadougou,maize flour,percent of population eating vehicle,na,46.0,,,total population,2007,two non-consecutive 24h recalls were performed...,"Becquey E, Delpeuch F, Konaté AM, Delsol H, La...",https://pubmed.ncbi.nlm.nih.gov/22017887/,,,


In [79]:
bf[(bf.vehicle=="maize flour")].source_citation.unique()

array(['Savy, M., Martin-Prével, Y., Traissac, P., Eymard-Duvernay, S., & Delpeuch, F. (2006). Dietary diversity scores and nutritional status of women change during the seasonal food shortage in rural Burkina Faso. The Journal of nutrition, 136(10), 2625-2632. ',
       'Becquey E, Delpeuch F, Konaté AM, Delsol H, Lange M, Zoungrana M, Martin-Prevel Y. Seasonality of the dietary dimension of household food security in urban Burkina Faso. Br J Nutr. 2012 Jun;107(12):1860-70. doi: 10.1017/S0007114511005071. Epub 2011 Oct 10. PMID: 22017887.'],
      dtype=object)

In [80]:
bf[(bf.vehicle=="maize flour")].source_link.unique()

array(['https://academic.oup.com/jn/article-abstract/136/10/2625/4746707',
       'https://pubmed.ncbi.nlm.nih.gov/22017887/'], dtype=object)

In [81]:
bf.value_mean = bf.value_mean.astype(float)

In [82]:
# worldbank says burkina faso is 70% rural https://data.worldbank.org/indicator/SP.RUR.TOTL.ZS?locations=BF
urbanicity_weight = {'rural':.7,'urban':.3}

bf_maize = bf[(bf.vehicle=="maize flour")].groupby('urbanicity').mean().reset_index()
bf_maize['urbanicity_weight'] = bf_maize.urbanicity.map(urbanicity_weight)

In [83]:
bf_maize.value_mean = bf_maize.value_mean * bf_maize.urbanicity_weight

In [84]:
bf_maize.value_mean.sum()

26.61

In [85]:
bf_maize_est = pd.DataFrame({
    'location_name':['Burkina Faso'],
    'value_description':['percent of population eating vehicle'],
    'vehicle':['maize flour'],
    'nutrient':['na'],
    'source_year':['2003-2007'],
    'source_citation':['Savy, M., Martin-Prével, Y., Traissac, P., Eymard-Duvernay, S., & Delpeuch, F. (2006). Dietary diversity scores and nutritional status of women change during the seasonal food shortage in rural Burkina Faso. The Journal of nutrition, 136(10), 2625-2632., Becquey E, Delpeuch F, Konaté AM, Delsol H, Lange M, Zoungrana M, Martin-Prevel Y. Seasonality of the dietary dimension of household food security in urban Burkina Faso. Br J Nutr. 2012 Jun;107(12):1860-70. doi: 10.1017/S0007114511005071. Epub 2011 Oct 10. PMID: 22017887.'],
    'source_link':['https://academic.oup.com/jn/article-abstract/136/10/2625/4746707, https://pubmed.ncbi.nlm.nih.gov/22017887/'],
    'value_mean':[bf_maize.value_mean.sum()],
    'data_choice_notes':['We had one urban source, and one rural. We weighted these by the percentage of Burkina Faso that is rural vs urban, using worldbank numbers (70% rural): https://data.worldbank.org/indicator/SP.RUR.TOTL.ZS?locations=BF)']
    })

subset_data['Burkina Faso'] = subset_data['Burkina Faso'].append(
    bf_maize_est
)

bf_maize_est

Unnamed: 0,location_name,value_description,vehicle,nutrient,source_year,source_citation,source_link,value_mean,data_choice_notes
0,Burkina Faso,percent of population eating vehicle,maize flour,na,2003-2007,"Savy, M., Martin-Prével, Y., Traissac, P., Eym...",https://academic.oup.com/jn/article-abstract/1...,26.61,"We had one urban source, and one rural. We wei..."


In [86]:
bf[(bf.vehicle=="oil") & (bf.value_description=="percent of population eating fortified vehicle")]

##take more recent number|

Unnamed: 0,location_id,location_name,urbanicity,subnational_name,vehicle,value_description,nutrient,value_mean,value_025_percentile,value_975_percentile,sub_population,source_year,notes,source_citation,source_link,inclusion_justification,included,data_choice_notes
73,,Burkina Faso,mixed/both,na,oil,percent of population eating fortified vehicle,vitamin a,70.0,na,na,total population,2010,TABLE 2. Estimated coverage of vitamin A–forti...,"Sablah M, Klopp J, Steinberg D, Touaoro Z, Lai...",https://pubmed.ncbi.nlm.nih.gov/23444712/,,,
74,,Burkina Faso,mixed/both,na,oil,percent of population eating fortified vehicle,vitamin a,75.0,na,na,total population,2013,TABLE 2. Estimated coverage of vitamin A–forti...,"Sablah M, Klopp J, Steinberg D, Touaoro Z, Lai...",https://pubmed.ncbi.nlm.nih.gov/23444712/,,,


In [87]:
bf.loc[(bf.vehicle=="oil") & (bf.value_description=="percent of population eating fortified vehicle") & (bf.source_year=='2013'),
      "data_choice_notes"] = "Out of two numbers from the same source, discarded 2010 number (70%) in lieu of 2013 number (75%)"

subset_data['Burkina Faso'] = subset_data['Burkina Faso'].append(bf[(bf.vehicle=="oil") & (bf.value_description=="percent of population eating fortified vehicle") & (bf.source_year=='2013')])

bf[(bf.vehicle=="oil") & (bf.value_description=="percent of population eating fortified vehicle") & (bf.source_year=='2013')]

Unnamed: 0,location_id,location_name,urbanicity,subnational_name,vehicle,value_description,nutrient,value_mean,value_025_percentile,value_975_percentile,sub_population,source_year,notes,source_citation,source_link,inclusion_justification,included,data_choice_notes
74,,Burkina Faso,mixed/both,na,oil,percent of population eating fortified vehicle,vitamin a,75.0,na,na,total population,2013,TABLE 2. Estimated coverage of vitamin A–forti...,"Sablah M, Klopp J, Steinberg D, Touaoro Z, Lai...",https://pubmed.ncbi.nlm.nih.gov/23444712/,,,"Out of two numbers from the same source, disca..."


In [88]:
bf.loc[(bf.vehicle=="oil") & (bf.value_description=="percent of population eating vehicle"),
      "data_choice_notes"] = "discarded 2003-2007 Becquey numbers for total pop (53%) and women (16-30%) in lieu of 2018 u5 survey (92%)"

subset_data['Burkina Faso'] = subset_data['Burkina Faso'].append(
    bf[(bf.vehicle=="oil") & (bf.value_description=="percent of population eating vehicle") & (bf.sub_population=="8-59 months")]
)

bf[(bf.vehicle=="oil") & (bf.value_description=="percent of population eating vehicle") & (bf.sub_population=="8-59 months")]

Unnamed: 0,location_id,location_name,urbanicity,subnational_name,vehicle,value_description,nutrient,value_mean,value_025_percentile,value_975_percentile,sub_population,source_year,notes,source_citation,source_link,inclusion_justification,included,data_choice_notes
756,,Burkina Faso,rural,,oil,percent of population eating vehicle,na,92.23,,,8-59 months,2018,"Survey of 514 children, asking who eats ""fats ...",Dietary habits associated with growth developm...,https://nutritionj.biomedcentral.com/articles/...,,,discarded 2003-2007 Becquey numbers for total ...


In [89]:
bf[(bf.vehicle=="wheat flour")]

Unnamed: 0,location_id,location_name,urbanicity,subnational_name,vehicle,value_description,nutrient,value_mean,value_025_percentile,value_975_percentile,sub_population,source_year,notes,source_citation,source_link,inclusion_justification,included,data_choice_notes
254,,Burkina Faso,rural,,wheat flour,percent of population eating vehicle,na,52.0,,,women of reproductive age,1999,"This was sampled for ""rural stratum primarily ...","Hess, S. Y., Brown, K. H., Sablah, M., Engle-S...",https://journals.sagepub.com/doi/pdf/10.1177/1...,,,
619,,Burkina Faso,rural,,wheat flour,percent of population eating vehicle,na,15.0,,,women of reproductive age,1999,"This was sampled for ""rural stratum primarily ...","Hess, S. Y., Brown, K. H., Sablah, M., Engle-S...",https://journals.sagepub.com/doi/pdf/10.1177/1...,,,
723,,Burkina Faso,mixed/both,,wheat flour,percent of population eating vehicle,na,48.0,,,women of reproductive age,1999,Women age 16-45,"Hess, S. Y., Brown, K. H., Sablah, M., Engle-S...",https://journals.sagepub.com/doi/pdf/10.1177/1...,,,
724,,Burkina Faso,urban,Ougadougou,wheat flour,percent of population eating vehicle,na,83.0,,,women of reproductive age,1999,Women age 16-45,"Hess, S. Y., Brown, K. H., Sablah, M., Engle-S...",https://journals.sagepub.com/doi/pdf/10.1177/1...,,,
744,,Burkina Faso,rural,,wheat flour,percent of population eating vehicle,na,41.0,,,women of reproductive age,1999,"This was sampled for ""rural stratum primarily ...","Hess, S. Y., Brown, K. H., Sablah, M., Engle-S...",https://journals.sagepub.com/doi/pdf/10.1177/1...,,,
745,,Burkina Faso,mixed/both,,wheat flour,percent of population eating vehicle,na,57.0,,,children 12-36 months,1999,,"Hess, S. Y., Brown, K. H., Sablah, M., Engle-S...",https://journals.sagepub.com/doi/pdf/10.1177/1...,,,
746,,Burkina Faso,urban,Ougadougou,wheat flour,percent of population eating vehicle,na,88.0,,,children 12-36 months,1999,,"Hess, S. Y., Brown, K. H., Sablah, M., Engle-S...",https://journals.sagepub.com/doi/pdf/10.1177/1...,,,
747,,Burkina Faso,rural,,wheat flour,percent of population eating vehicle,na,19.0,,,children 12-36 months,1999,"This was sampled for ""rural stratum primarily ...","Hess, S. Y., Brown, K. H., Sablah, M., Engle-S...",https://journals.sagepub.com/doi/pdf/10.1177/1...,,,
748,,Burkina Faso,rural,,wheat flour,percent of population eating vehicle,na,58.0,,,children 12-36 months,1999,"This was sampled for ""rural stratum primarily ...","Hess, S. Y., Brown, K. H., Sablah, M., Engle-S...",https://journals.sagepub.com/doi/pdf/10.1177/1...,,,
749,,Burkina Faso,rural,,wheat flour,percent of population eating vehicle,na,62.0,,,children 12-36 months,1999,"This was sampled for ""rural stratum primarily ...","Hess, S. Y., Brown, K. H., Sablah, M., Engle-S...",https://journals.sagepub.com/doi/pdf/10.1177/1...,,,


In [90]:
bf.loc[(bf.vehicle=="wheat flour") & (bf.urbanicity=="mixed/both"),
      "data_choice_notes"] = "Only one source; discarding the urban- and rural- specific estimates, keeping the total estimates for WRA and u5"

subset_data['Burkina Faso'] = subset_data['Burkina Faso'].append(
    bf[(bf.vehicle=="wheat flour") & (bf.urbanicity=="mixed/both")]
)

bf[(bf.vehicle=="wheat flour") & (bf.urbanicity=="mixed/both")]

Unnamed: 0,location_id,location_name,urbanicity,subnational_name,vehicle,value_description,nutrient,value_mean,value_025_percentile,value_975_percentile,sub_population,source_year,notes,source_citation,source_link,inclusion_justification,included,data_choice_notes
723,,Burkina Faso,mixed/both,,wheat flour,percent of population eating vehicle,na,48.0,,,women of reproductive age,1999,Women age 16-45,"Hess, S. Y., Brown, K. H., Sablah, M., Engle-S...",https://journals.sagepub.com/doi/pdf/10.1177/1...,,,Only one source; discarding the urban- and rur...
745,,Burkina Faso,mixed/both,,wheat flour,percent of population eating vehicle,na,57.0,,,children 12-36 months,1999,,"Hess, S. Y., Brown, K. H., Sablah, M., Engle-S...",https://journals.sagepub.com/doi/pdf/10.1177/1...,,,Only one source; discarding the urban- and rur...


In [91]:
bf[bf.vehicle.isin(vehicles)].groupby(['vehicle','value_description']).mean()

Unnamed: 0_level_0,Unnamed: 1_level_0,location_id,value_mean
vehicle,value_description,Unnamed: 2_level_1,Unnamed: 3_level_1
maize flour,percent of population eating vehicle,,31.15
oil,percent of population eating fortified vehicle,,72.5
oil,percent of population eating vehicle,,48.906
wheat flour,percent of population eating vehicle,,52.3


In [92]:
mult_estimates[(mult_estimates.location_name=="Burkina Faso") 
               & (mult_estimates.vehicle.isin(vehicles)) 
               & (mult_estimates.nutrient.isin(nutrients + ['NA']))].sort_values('vehicle')

Unnamed: 0,location_name,vehicle,nutrient,standard,value_description,value_mean,estimation_status
27,Burkina Faso,maize flour,,Unknown,percent of population eating industrially prod...,0.0,multiplicative
378,Burkina Faso,maize flour,iron,Unknown,percent of population eating fortified vehicle,0.0,multiplicative
379,Burkina Faso,maize flour,zinc,Unknown,percent of population eating fortified vehicle,0.0,multiplicative
380,Burkina Faso,maize flour,folic acid,Unknown,percent of population eating fortified vehicle,0.0,multiplicative
20,Burkina Faso,wheat flour,,Mandatory,percent of population eating industrially prod...,52.3,multiplicative
24,Burkina Faso,wheat flour,,Unknown,percent of population eating industrially prod...,52.3,multiplicative
371,Burkina Faso,wheat flour,iron,Mandatory,percent of population eating fortified vehicle,17.38975,multiplicative
372,Burkina Faso,wheat flour,folic acid,Mandatory,percent of population eating fortified vehicle,32.1645,multiplicative


In [93]:
subset_data['Burkina Faso'] = subset_data['Burkina Faso'].append(
    mult_estimates[(mult_estimates.location_name=="Burkina Faso") 
               & (mult_estimates.vehicle.isin(vehicles)) 
               & (mult_estimates.nutrient.isin(nutrients + ['NA']))].sort_values('vehicle').drop(columns='standard').drop_duplicates()
)

mult_estimates[(mult_estimates.location_name=="Burkina Faso") 
               & (mult_estimates.vehicle.isin(vehicles)) 
               & (mult_estimates.nutrient.isin(nutrients + ['NA']))].sort_values('vehicle').drop(columns='standard').drop_duplicates()

Unnamed: 0,location_name,vehicle,nutrient,value_description,value_mean,estimation_status
27,Burkina Faso,maize flour,,percent of population eating industrially prod...,0.0,multiplicative
378,Burkina Faso,maize flour,iron,percent of population eating fortified vehicle,0.0,multiplicative
379,Burkina Faso,maize flour,zinc,percent of population eating fortified vehicle,0.0,multiplicative
380,Burkina Faso,maize flour,folic acid,percent of population eating fortified vehicle,0.0,multiplicative
20,Burkina Faso,wheat flour,,percent of population eating industrially prod...,52.3,multiplicative
371,Burkina Faso,wheat flour,iron,percent of population eating fortified vehicle,17.38975,multiplicative
372,Burkina Faso,wheat flour,folic acid,percent of population eating fortified vehicle,32.1645,multiplicative


In [94]:
#this estimate makes no sense
pct_ind_prod[(pct_ind_prod.location_name=="Burkina Faso")]

Unnamed: 0,location_name,vehicle,percent_of_population_eating_industrially_produced_vehicle
2,Burkina Faso,oil,51.975594


In [95]:
bf_oil_est = pd.DataFrame({
    'location_name':['Burkina Faso'],
    'value_description':['percent of population eating industrially produced vehicle'],
    'vehicle':['oil'],
    'nutrient':['na'],
    'value_mean':[51.975594],
    'estimation_status':['multiplicative']
})

subset_data['Burkina Faso'] = subset_data['Burkina Faso'].append(
    bf_oil_est
)

bf_oil_est

Unnamed: 0,location_name,value_description,vehicle,nutrient,value_mean,estimation_status
0,Burkina Faso,percent of population eating industrially prod...,oil,na,51.975594,multiplicative


In [96]:
path_maize_estimates = '/ihme/homes/beatrixh/vivarium_research_lsff/data_prep/outputs/pct_eating_fortified_maize_regression_estimates_3_22_2021.csv'
fort_maize = pd.read_csv(path_maize_estimates)

draws = [f'draw_{i}' for i in range(500)]

fort_maize = fort_maize.groupby(['location_name','vehicle']).mean().reset_index()

In [97]:
fort_maize['value_mean'] = fort_maize[draws].mean(axis=1)
fort_maize['value_025_percentile'] = fort_maize[draws].quantile(.025, axis=1)
fort_maize['value_975_percentile'] = fort_maize[draws].quantile(.975, axis=1)

fort_maize = fort_maize[['location_name','vehicle','value_mean']]

In [98]:
fort_maize[(fort_maize.location_name=="Burkina Faso")]


Unnamed: 0,location_name,vehicle,value_mean
1,Burkina Faso,maize flour,0.0


In [99]:
bf_maize_est = pd.DataFrame({
    'location_name':['Burkina Faso'],
    'value_description':['percent of population eating fortified vehicle'],
    'vehicle':['maize flour'],
    'nutrient':['vitamin a'],
    'value_mean':[0],
    'estimation_status':['regression']
})

subset_data['Burkina Faso'] = subset_data['Burkina Faso'].append(
    bf_maize_est
)

bf_maize_est

Unnamed: 0,location_name,value_description,vehicle,nutrient,value_mean,estimation_status
0,Burkina Faso,percent of population eating fortified vehicle,maize flour,vitamin a,0,regression


In [100]:
# path_wheat_estimates = '/ihme/homes/beatrixh/vivarium_research_lsff/data_prep/outputs/pct_eating_fortified_wheat_regression_estimates_3_22_2021.csv'
# fort_wheat = pd.read_csv(path_maize_estimates)

# draws = [f'draw_{i}' for i in range(500)]

# fort_wheat = fort_wheat.groupby(['location_name','vehicle']).mean().reset_index()
# fort_wheat['value_mean'] = fort_wheat[draws].mean(axis=1)
# fort_wheat['value_025_percentile'] = fort_wheat[draws].quantile(.025, axis=1)
# fort_wheat['value_975_percentile'] = fort_wheat[draws].quantile(.975, axis=1)

# fort_wheat = fort_wheat[['location_name','vehicle','value_mean']]

In [101]:
fort_wheat[(fort_wheat.location_name=="Burkina Faso")]

Unnamed: 0,location_name,vehicle,value_mean
2,Burkina Faso,wheat flour,12.388563


In [102]:
n=2

bf_wheat_est = pd.DataFrame({
    'location_name':['Burkina Faso'] * n,
    'value_description':['percent of population eating fortified vehicle']* n,
    'vehicle':['wheat flour']* n,
    'nutrient':['zinc', 'vitamin a'],
    'value_mean':[12.388563]* n,
    'estimation_status':['regression']* n
})

subset_data['Burkina Faso'] = subset_data['Burkina Faso'].append(
    bf_wheat_est
)

bf_wheat_est

Unnamed: 0,location_name,value_description,vehicle,nutrient,value_mean,estimation_status
0,Burkina Faso,percent of population eating fortified vehicle,wheat flour,zinc,12.388563,regression
1,Burkina Faso,percent of population eating fortified vehicle,wheat flour,vitamin a,12.388563,regression


## pop-weight subnationals

In [103]:
df[df.location_name.isin(location_names)].subnational_name.unique()

array(['na', nan, 'Kitui', 'Vihiga', 'Ougadougou', 'Gnagna'], dtype=object)

In [104]:
# 'Kitui', 'Vihiga' are relevant; 'Ougadougou', 'Gnagna' are from burkina faso, where we dont have to do any subnat weighting

In [105]:
loc_metadata = get_locs(location_set_id=35, gbd_round_id=6, decomp_step="step4")

#combine subnational estimates by population-weighting

subnats = loc_metadata[loc_metadata.location_name.isin(['Kitui', 'Vihiga', 'Ougadougou', 'Gnagna'])][['location_id','location_name','parent_id']]

In [106]:
subnats = subnats.rename(columns = {
    'location_id':'subnational_id',
    'location_name':'subnational_name'
})

subnat_pop = get_population(age_group_id=22, 
                     location_id=list(subnats.subnational_id),
                     year_id=2017,
                     sex_id=3,
                     gbd_round_id=6, 
                     decomp_step='step5')

subnats = subnats.merge(subnat_pop, left_on = 'subnational_id', right_on = 'location_id')[['subnational_id','subnational_name','population','parent_id']]

subnats['pop_denom'] = subnats.groupby('parent_id').transform('sum').population

subnats['subnat_pop_weight'] = subnats.population / subnats.pop_denom

In [107]:
subnats

Unnamed: 0,subnational_id,subnational_name,population,parent_id,pop_denom,subnat_pop_weight
0,35634,Kitui,1140323.0,180,1829530.0,0.623289
1,35661,Vihiga,689203.2,180,1829530.0,0.376711


In [108]:
tmp = subset_data['Kenya']

In [109]:
scols = rcols + ['source_link','source_citation','estimation_status','sub_population']

In [110]:
tmp.loc[tmp.source_citation.isna(),'source_citation'] = 'na'
tmp.loc[tmp.source_link.isna(),'source_link'] = 'na'
tmp.loc[tmp.estimation_status.isna(),'estimation_status'] = 'na'

tmp.loc[tmp.value_025_percentile=='na','value_025_percentile'] = np.nan
tmp.loc[tmp.value_975_percentile=='na','value_975_percentile'] = np.nan
tmp.loc[tmp.estimation_status.isna(),'estimation_status'] = 'na'

tmp['is_dupl'] = tmp.duplicated(subset = rcols, keep = False)
tmp = tmp.merge(subnats[['subnational_name','subnat_pop_weight']], how = 'left')

In [111]:
tmp.subnat_pop_weight = tmp.subnat_pop_weight.astype(float)
tmp.value_mean = tmp.value_mean.astype(float)
tmp.value_025_percentile = tmp.value_025_percentile.astype(float)
tmp.value_975_percentile = tmp.value_975_percentile.astype(float)

In [112]:
tmp.loc[tmp.is_dupl,'value_mean'] = tmp.loc[tmp.is_dupl].value_mean * tmp.loc[tmp.is_dupl].subnat_pop_weight
tmp.loc[tmp.is_dupl,'value_025_percentile'] = tmp.value_025_percentile * tmp.subnat_pop_weight
tmp.loc[tmp.is_dupl,'value_975_percentile'] = tmp.value_975_percentile * tmp.subnat_pop_weight

In [113]:
tmp.loc[~tmp.is_dupl,'subnational_name'] = 'na'

In [114]:
scols

['location_name',
 'vehicle',
 'value_description',
 'nutrient',
 'source_link',
 'source_citation',
 'estimation_status',
 'sub_population']

In [115]:
tmp_a = tmp[~tmp.is_dupl]

In [116]:
tmp_b = tmp[tmp.is_dupl]

In [117]:
tmp_b.data_choice_notes.unique()

array(['Excluded central bureau of stats total pop number (85%) from 2005 in lieu of Ferguson rural u5 number from 2015. Note 72% of kenyan population is rural according to the world bank: https://data.worldbank.org/indicator/SP.RUR.TOTL.ZS?locations=KE',
       'Excluded central bureau of stats total pop number (16%) from 2005 in lieu of Ferguson rural u5 number from 2015. Note 72% of kenyan population is rural according to the world bank: https://data.worldbank.org/indicator/SP.RUR.TOTL.ZS?locations=KE'],
      dtype=object)

In [118]:
rcols

['location_name', 'vehicle', 'value_description', 'nutrient']

In [119]:
tmp_b.value_mean = tmp_b.groupby(scols).transform('sum').value_mean
tmp_b.value_025_percentile = tmp_b.groupby(scols).transform('sum').value_025_percentile
tmp_b.value_975_percentile = tmp_b.groupby(scols).transform('sum').value_975_percentile

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self[name] = value


In [120]:
tmp_b = tmp_b[['location_name','vehicle','value_description','nutrient','estimation_status','source_link','source_citation','source_year','sub_population','value_mean','value_025_percentile','value_975_percentile','data_choice_notes',]].drop_duplicates()

In [121]:
tmp_b.value_mean = tmp_b.value_mean.astype(float)
tmp_b.value_025_percentile = tmp_b.value_025_percentile.astype(float)
tmp_b.value_975_percentile = tmp_b.value_975_percentile.astype(float)

In [122]:
tmp_b = tmp_b.groupby(['location_name','vehicle','value_description','nutrient','source_link','source_citation','source_year',]).mean().reset_index()

In [123]:
tmp_b['data_choice_notes'] = "Excluded central bureau of stats total pop number from 2005 in lieu of Ferguson rural u5 number from 2015. Note 72% of kenyan population is rural according to the world bank: https://data.worldbank.org/indicator/SP.RUR.TOTL.ZS?locations=KE'. Population weighted rural subnationals."

In [124]:
tmp = tmp_b.append(tmp_a)

In [125]:
tmp

Unnamed: 0,data_choice_notes,estimation_status,included,inclusion_justification,is_dupl,location_id,location_name,notes,nutrient,source_citation,...,standard,sub_population,subnat_pop_weight,subnational_name,urbanicity,value_025_percentile,value_975_percentile,value_description,value_mean,vehicle
0,Excluded central bureau of stats total pop num...,,,,,,Kenya,,na,"Ferguson, Elaine et al. “Zinc, iron and calciu...",...,,,,,,0.0,0.0,percent of population eating vehicle,91.67123,maize flour
1,Excluded central bureau of stats total pop num...,,,,,,Kenya,,na,"Ferguson, Elaine et al. “Zinc, iron and calciu...",...,,,,,,0.0,0.0,percent of population eating vehicle,56.652927,oil
0,Only one source.,na,,,False,,Kenya,nationally representative survey: proportion o...,na,"Central Bureau of Statistics (Kenya), UK Depar...",...,,total population,,na,mixed/both,,,percent of population eating industrially prod...,36.67,maize flour
12,Only one source.,na,,,False,,Kenya,nationally representative survey: proportion o...,na,"Central Bureau of Statistics (Kenya), UK Depar...",...,,total population,,na,mixed/both,,,percent of population eating industrially prod...,9.39,oil
25,Only one source.,na,,,False,,Kenya,nationally representative survey: proportion o...,na,"Central Bureau of Statistics (Kenya), UK Depar...",...,,total population,,na,mixed/both,,,percent of population eating industrially prod...,30.95,wheat flour
26,Only one source.,na,,,False,,Kenya,nationally representative survey: proportion o...,na,"Central Bureau of Statistics (Kenya), UK Depar...",...,,total population,,na,mixed/both,,,percent of population eating vehicle,36.58,wheat flour
27,,multiplicative,,,False,,Kenya,,iron,na,...,Mandatory,,,na,,,,percent of population eating fortified vehicle,33.83575,wheat flour
28,,multiplicative,,,False,,Kenya,,zinc,na,...,Mandatory,,,na,,,,percent of population eating fortified vehicle,34.703333,wheat flour
29,,multiplicative,,,False,,Kenya,,folic acid,na,...,Mandatory,,,na,,,,percent of population eating fortified vehicle,34.703333,wheat flour
30,,multiplicative,,,False,,Kenya,,vitamin a,na,...,Mandatory,,,na,,,,percent of population eating fortified vehicle,34.703333,wheat flour


In [126]:
subset_data['Kenya'] = tmp

## Check for missingness

In [127]:
all_data = pd.concat(list(subset_data.values()))

of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.


  """Entry point for launching an IPython kernel.


In [128]:
all_data.loc[(all_data.location_name=="Kenya") & (all_data.vehicle=="oil"),["value_description",'value_mean','source_citation',"data_choice_notes"]]

Unnamed: 0,value_description,value_mean,source_citation,data_choice_notes
1,percent of population eating vehicle,56.6529,"Ferguson, Elaine et al. “Zinc, iron and calciu...",Excluded central bureau of stats total pop num...
12,percent of population eating industrially prod...,9.39,"Central Bureau of Statistics (Kenya), UK Depar...",Only one source.
35,percent of population eating fortified vehicle,0.0,na,


In [129]:
all_data.data_choice_notes.unique()

array(["Excluded central bureau of stats total pop number from 2005 in lieu of Ferguson rural u5 number from 2015. Note 72% of kenyan population is rural according to the world bank: https://data.worldbank.org/indicator/SP.RUR.TOTL.ZS?locations=KE'. Population weighted rural subnationals.",
       'Only one source.', nan,
       'We had one urban source, and one rural. We weighted these by the percentage of Burkina Faso that is rural vs urban, using worldbank numbers (70% rural): https://data.worldbank.org/indicator/SP.RUR.TOTL.ZS?locations=BF)',
       'Out of two numbers from the same source, discarded 2010 number (70%) in lieu of 2013 number (75%)',
       'discarded 2003-2007 Becquey numbers for total pop (53%) and women (16-30%) in lieu of 2018 u5 survey (92%)',
       'Only one source; discarding the urban- and rural- specific estimates, keeping the total estimates for WRA and u5',
       'Unable to find data; assuming 0; justification: lack of reports on fortification activities

In [130]:
all_data.loc[~(all_data.nutrient.isin(['vitamin a','iron','zinc','folic acid'])),'nutrient'] = 'na'

In [131]:
all_data[rcols + ['value_mean','value_025_percentile','value_975_percentile','sub_population']]

Unnamed: 0,location_name,vehicle,value_description,nutrient,value_mean,value_025_percentile,value_975_percentile,sub_population
0,Kenya,maize flour,percent of population eating vehicle,na,91.6712,0,0,
1,Kenya,oil,percent of population eating vehicle,na,56.6529,0,0,
0,Kenya,maize flour,percent of population eating industrially prod...,na,36.67,,,total population
12,Kenya,oil,percent of population eating industrially prod...,na,9.39,,,total population
25,Kenya,wheat flour,percent of population eating industrially prod...,na,30.95,,,total population
26,Kenya,wheat flour,percent of population eating vehicle,na,36.58,,,total population
27,Kenya,wheat flour,percent of population eating fortified vehicle,iron,33.8358,,,
28,Kenya,wheat flour,percent of population eating fortified vehicle,zinc,34.7033,,,
29,Kenya,wheat flour,percent of population eating fortified vehicle,folic acid,34.7033,,,
30,Kenya,wheat flour,percent of population eating fortified vehicle,vitamin a,34.7033,,,


In [132]:
check = target.merge(all_data[rcols + ['value_mean']], on = rcols, how = 'left')

In [133]:
check[check.value_mean.isna()]

Unnamed: 0,location_name,vehicle,value_description,nutrient,value_mean


In [134]:
assert(len(check[check.value_mean.isna()])==0), "there are target loc/vehcile/val/nutrient combos youre missing"

In [135]:
rcols

['location_name', 'vehicle', 'value_description', 'nutrient']

In [136]:
output = all_data[rcols + ['value_mean','value_025_percentile', 'value_975_percentile','sub_population','estimation_status','source_citation','source_link','data_choice_notes']].sort_values(rcols).set_index(rcols)

In [137]:
##impute all missing CIs

# clean value_mean
output.loc[output.value_mean=='na','value_mean'] = np.nan
output.value_mean = output.value_mean.astype(float)

# clean 2.5th %ile
output.loc[output.value_025_percentile=='na','value_025_percentile'] = np.nan
output.value_025_percentile = output.value_025_percentile.astype(float)

# clean 97.5th %ile
output.loc[output.value_975_percentile=='na','value_975_percentile'] = np.nan
output.value_975_percentile = output.value_975_percentile.astype(float)

In [138]:
output.loc[(output.value_mean > output.value_975_percentile),'value_975_percentile'] = np.nan

In [139]:
output.loc[(output.value_mean < output.value_025_percentile)]

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,value_mean,value_025_percentile,value_975_percentile,sub_population,estimation_status,source_citation,source_link,data_choice_notes
location_name,vehicle,value_description,nutrient,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1


In [140]:
output.loc[(output.value_mean == output.value_025_percentile)]

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,value_mean,value_025_percentile,value_975_percentile,sub_population,estimation_status,source_citation,source_link,data_choice_notes
location_name,vehicle,value_description,nutrient,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
Myanmar,oil,percent of population eating fortified vehicle,vitamin a,0.0,0.0,5.0,total population,,,,Unable to find data; assuming 0; justification...
Myanmar,wheat flour,percent of population eating fortified vehicle,folic acid,0.0,0.0,5.0,total population,,,,Unable to find data; assuming 0; justification...
Myanmar,wheat flour,percent of population eating fortified vehicle,iron,0.0,0.0,5.0,total population,,,,Unable to find data; assuming 0; justification...
Myanmar,wheat flour,percent of population eating fortified vehicle,zinc,0.0,0.0,5.0,total population,,,,Unable to find data; assuming 0; justification...
Nepal,oil,percent of population eating vehicle,na,99.8,99.8,99.9,total population,,GHDx,http://internal-ghdx.healthdata.org/record/nep...,"Keeping overall estimate (99.8%), discarding e..."


In [141]:
output['scale_over_mean'] = (output.value_975_percentile - output.value_025_percentile) / output.value_mean

In [142]:
output = output.reset_index()

In [143]:
r = output.loc[(output.scale_over_mean!=np.inf),['vehicle','scale_over_mean']]
# .groupby('vehicle').mean().dropna().rename(columns={'scale_over_mean':'r'}).reset_index()

In [144]:
r_mean = r.scale_over_mean.mean()

In [145]:
r.loc[r.vehicle=="maize flour",'scale_over_mean'] = r_mean

In [146]:
r = r.groupby('vehicle').mean().dropna().rename(columns={'scale_over_mean':'r'}).reset_index()

In [147]:
# add uncertainty
output = output.merge(r, on = 'vehicle', how = 'outer')
output['lower'] = np.clip(output.value_mean - (output.r * output.value_mean)/2, 0, 100)
output['upper'] = np.clip(output.value_mean + (output.r * output.value_mean)/2, 0, 100)

In [148]:
output

Unnamed: 0,location_name,vehicle,value_description,nutrient,value_mean,value_025_percentile,value_975_percentile,sub_population,estimation_status,source_citation,source_link,data_choice_notes,scale_over_mean,r,lower,upper
0,Burkina Faso,maize flour,percent of population eating fortified vehicle,folic acid,0.0,,,,multiplicative,,,,,1.060711,0.0,0.0
1,Burkina Faso,maize flour,percent of population eating fortified vehicle,iron,0.0,,,,multiplicative,,,,,1.060711,0.0,0.0
2,Burkina Faso,maize flour,percent of population eating fortified vehicle,vitamin a,0.0,,,,regression,,,,,1.060711,0.0,0.0
3,Burkina Faso,maize flour,percent of population eating fortified vehicle,zinc,0.0,,,,multiplicative,,,,,1.060711,0.0,0.0
4,Burkina Faso,maize flour,percent of population eating industrially prod...,na,0.0,,,,multiplicative,,,,,1.060711,0.0,0.0
5,Burkina Faso,maize flour,percent of population eating vehicle,na,26.61,,,,,"Savy, M., Martin-Prével, Y., Traissac, P., Eym...",https://academic.oup.com/jn/article-abstract/1...,"We had one urban source, and one rural. We wei...",,1.060711,12.497238,40.722762
6,Kenya,maize flour,percent of population eating fortified vehicle,folic acid,0.0,,,,multiplicative,na,na,,,1.060711,0.0,0.0
7,Kenya,maize flour,percent of population eating fortified vehicle,iron,5.317153,,,,multiplicative,na,na,,,1.060711,2.497171,8.137134
8,Kenya,maize flour,percent of population eating fortified vehicle,vitamin a,0.0,,,,multiplicative,na,na,,,1.060711,0.0,0.0
9,Kenya,maize flour,percent of population eating fortified vehicle,zinc,0.0,,,,multiplicative,na,na,,,1.060711,0.0,0.0


In [149]:
output.loc[(output.value_mean < output.value_025_percentile) | (output.value_025_percentile.isna()),'CI_source'] = "modeling"
output.loc[(output.value_mean > output.value_975_percentile) | (output.value_975_percentile.isna()),'CI_source'] = "modeling"

output.loc[output.CI_source.isna(),'CI_source'] = 'extraction'

output.loc[(output.value_mean < output.value_025_percentile) | (output.value_025_percentile.isna()),'value_025_percentile'] = output.loc[(output.value_mean < output.value_025_percentile) | (output.value_025_percentile.isna())].lower
output.loc[(output.value_mean > output.value_975_percentile) | (output.value_975_percentile.isna()),'value_975_percentile'] = output.loc[(output.value_mean > output.value_975_percentile) | (output.value_975_percentile.isna())].upper

output = output.drop(columns=['r','lower','upper','scale_over_mean']).set_index(rcols)

output.loc[output.estimation_status.isna(),'estimation_status'] = 'na'

  raw_cell, store_history, silent, shell_futures)


In [150]:
output

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,value_mean,value_025_percentile,value_975_percentile,sub_population,estimation_status,source_citation,source_link,data_choice_notes,CI_source
location_name,vehicle,value_description,nutrient,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
Burkina Faso,maize flour,percent of population eating fortified vehicle,folic acid,0.0,0.0,0.0,,multiplicative,,,,modeling
Burkina Faso,maize flour,percent of population eating fortified vehicle,iron,0.0,0.0,0.0,,multiplicative,,,,modeling
Burkina Faso,maize flour,percent of population eating fortified vehicle,vitamin a,0.0,0.0,0.0,,regression,,,,modeling
Burkina Faso,maize flour,percent of population eating fortified vehicle,zinc,0.0,0.0,0.0,,multiplicative,,,,modeling
Burkina Faso,maize flour,percent of population eating industrially produced vehicle,na,0.0,0.0,0.0,,multiplicative,,,,modeling
Burkina Faso,maize flour,percent of population eating vehicle,na,26.61,12.497238,40.722762,,na,"Savy, M., Martin-Prével, Y., Traissac, P., Eym...",https://academic.oup.com/jn/article-abstract/1...,"We had one urban source, and one rural. We wei...",modeling
Kenya,maize flour,percent of population eating fortified vehicle,folic acid,0.0,0.0,0.0,,multiplicative,na,na,,modeling
Kenya,maize flour,percent of population eating fortified vehicle,iron,5.317153,2.497171,8.137134,,multiplicative,na,na,,modeling
Kenya,maize flour,percent of population eating fortified vehicle,vitamin a,0.0,0.0,0.0,,multiplicative,na,na,,modeling
Kenya,maize flour,percent of population eating fortified vehicle,zinc,0.0,0.0,0.0,,multiplicative,na,na,,modeling


In [151]:
output = output.reset_index()

In [152]:
output.loc[(output.location_name=="Vietnam"),'location_name'] = "Viet Nam"

In [153]:
sort_helper = {
    'percent of population eating fortified vehicle': 'C',
 'percent of population eating industrially produced vehicle': 'B',
 'percent of population eating vehicle': 'A'}

In [154]:
output['sort_helper'] = output.value_description.map(sort_helper)

In [155]:
output = output.sort_values(['location_name','vehicle','sort_helper','value_description','nutrient']).drop(columns='sort_helper')

In [156]:
save_path_tmp = '/ihme/homes/beatrixh/repos/scratch/tier3_coverage_data_03_23_2021.csv'
output.to_csv(save_path_tmp, index = False)

In [158]:
save_path = '/ihme/homes/beatrixh/vivarium_research_lsff/data_prep/outputs/population_coverage_data_tier3_locs_3_23_2021.csv'
output.to_csv(save_path, index = False)

In [159]:
output.head()

Unnamed: 0,location_name,vehicle,value_description,nutrient,value_mean,value_025_percentile,value_975_percentile,sub_population,estimation_status,source_citation,source_link,data_choice_notes,CI_source
5,Burkina Faso,maize flour,percent of population eating vehicle,na,26.61,12.497238,40.722762,,na,"Savy, M., Martin-Prével, Y., Traissac, P., Eym...",https://academic.oup.com/jn/article-abstract/1...,"We had one urban source, and one rural. We wei...",modeling
4,Burkina Faso,maize flour,percent of population eating industrially prod...,na,0.0,0.0,0.0,,multiplicative,,,,modeling
0,Burkina Faso,maize flour,percent of population eating fortified vehicle,folic acid,0.0,0.0,0.0,,multiplicative,,,,modeling
1,Burkina Faso,maize flour,percent of population eating fortified vehicle,iron,0.0,0.0,0.0,,multiplicative,,,,modeling
2,Burkina Faso,maize flour,percent of population eating fortified vehicle,vitamin a,0.0,0.0,0.0,,regression,,,,modeling


In [176]:
formatted_output = output.copy()

In [177]:
loc_metadata = get_locs(location_set_id=35, gbd_round_id=6, decomp_step="step4")

loc_ids = loc_metadata.loc[(loc_metadata.location_name.isin(formatted_output.location_name.unique())),['location_id','location_name']]

formatted_output = formatted_output.merge(loc_ids, on = 'location_name', how = 'left')

In [181]:
formatted_usecols = ['location_id','location_name','sub_population',
                     'vehicle','value_description','nutrient','value_mean',
                     'value_025_percentile','value_975_percentile']

formatted_output = formatted_output[formatted_usecols]

In [182]:
save_formatted_output_path = '/ihme/homes/beatrixh/vivarium_research_lsff/data_prep/outputs/population_coverage_tier3_input_data.csv'
formatted_output.to_csv(save_formatted_output_path, index = False)

In [183]:
prev_tiers_path = '/ihme/homes/beatrixh/vivarium_research_lsff/data_prep/outputs/lsff_input_coverage_data.csv'
prev_tiers = pd.read_csv(prev_tiers_path)

In [186]:
all_tiers = prev_tiers.append(formatted_output)

In [187]:
# all_tiers.to_csv(prev_tiers_path, index = False)

In [None]:
## TODO 
# check whats missing
# pop weight as necessary
# rerun the regressions --- make sure to fix burkina faso
# incorporate and format 