In [1]:
from db_queries import get_population, get_ids
from db_queries import get_location_metadata as get_locs

In [2]:
import pandas as pd, numpy as np

# LSFF: choose population coverage data by hand for tier 2 countries

## vehicles: Wheat flour, maize flour, oil

## countries: Bangladesh, Pakistan, United Republic of Tanzania, Uganda, South Africa

In [3]:
nutrients = ['iron','zinc','folic acid','vitamin a']

In [4]:
data_path = '/ihme/homes/beatrixh/vivarium_research_lsff/data_prep/inputs/extraction_sheet_lsff_03_16_2021.csv'
assm_path = '/ihme/homes/beatrixh/vivarium_research_lsff/data_prep/inputs/extraction_sheet_lsff_assumed_coverage_03_11_2021.csv'

df = pd.read_csv(data_path)

In [5]:
df.loc[df.nutrient=="folic acid, folate, b9",'nutrient']= 'folic acid'

In [6]:
assum = pd.read_csv(assm_path)

In [7]:
# don't need to deal with this for tier2
assum.location_name.unique()

array(['Ethiopia', 'Myanmar', 'India'], dtype=object)

In [8]:
df['estimation_status'] = 'na'

In [9]:
df['data_choice_notes'] = ""

In [10]:
mult_estimates_path = '/ihme/homes/beatrixh/vivarium_research_lsff/data_prep/outputs/lsff_data_estimated_03_11_2021.csv'

mult_estimates = pd.read_csv(mult_estimates_path)

In [11]:
mult_estimates[mult_estimates.location_name=="Pakistan"]

Unnamed: 0,location_name,vehicle,nutrient,standard,A,B,B_estimate,C,C_estimate
255,Pakistan,wheat flour,iron,Mandatory in Punjab only,97.333333,48.666667,0.0,5.333333,0.0
256,Pakistan,wheat flour,folic acid,Mandatory in Punjab only,97.333333,48.666667,0.0,1.946667,1.0
257,Pakistan,wheat flour,zinc,Unknown,97.333333,48.666667,0.0,1.946667,1.0
258,Pakistan,wheat flour,vitamin b12,Unknown,97.333333,48.666667,0.0,1.946667,1.0
259,Pakistan,wheat flour,vitamin b1,Unknown,97.333333,48.666667,0.0,0.0,1.0
260,Pakistan,oil,vitamin a,Mandatory,100.0,95.5,0.0,30.0,0.0
261,Pakistan,oil,vitamin d,Mandatory,100.0,95.5,0.0,,
348,Pakistan,wheat flour,vitamin a,,97.333333,48.666667,0.0,0.0,1.0


In [12]:
#reformat
mult_estimates.loc[(mult_estimates.B_estimate!=1.0),'B'] = np.nan
mult_estimates.loc[(mult_estimates.C_estimate!=1.0),'C'] = np.nan

mult_estimates = pd.melt(mult_estimates,
                         id_vars = ['location_name','vehicle','nutrient','standard'],
                         value_vars = ['B','C'], var_name = 'value_description', value_name = 'value_mean').dropna()

mult_estimates.loc[(mult_estimates.value_description=="B"),'nutrient'] = 'NA'

mult_estimates = mult_estimates.drop_duplicates()

mult_estimates.value_description = mult_estimates.value_description.map({
    'B':'percent of population eating industrially produced vehicle',
    'C':'percent of population eating fortified vehicle'
})

In [13]:
mult_estimates['estimation_status'] = 'multiplicative'

In [14]:
## load legal combos
import pickle
data_prep_dir = '/ihme/homes/beatrixh/vivarium_research_lsff/data_prep/inputs/'

with open(data_prep_dir + 'lsff_vehicle_nutrient_pairs.pickle', 'rb') as handle:
    vehicle_nutrient_map = pickle.load(handle)
    
with open(data_prep_dir + 'lsff_country_vehicle_pairs.pickle', 'rb') as handle:
    country_vehicle_map = pickle.load(handle)
    
with open(data_prep_dir + 'lsff_vehicle_country_pairs.pickle', 'rb') as handle:
    vehicle_country_map = pickle.load(handle)

In [15]:
location_names = ['Bangladesh','Pakistan','United Republic of Tanzania','Uganda','South Africa']

In [16]:
# these are the vehicles per country we need
target_high_level = pd.DataFrame([(loc,v) for loc in location_names for v in country_vehicle_map[loc]],
            columns=['location_name','vehicle']).sort_values(['location_name','vehicle']).set_index(['location_name','vehicle'])

target_high_level

location_name,vehicle
Bangladesh,oil
Bangladesh,wheat flour
Pakistan,oil
Pakistan,wheat flour
South Africa,maize flour
South Africa,oil
South Africa,wheat flour
Uganda,maize flour
Uganda,oil
Uganda,wheat flour


In [17]:
target_vehicles = [i for i in vehicle_nutrient_map.keys() if 'iron' in vehicle_nutrient_map[i]]

target_a = pd.DataFrame([(loc,vehicle,nutrient,'percent of population eating fortified vehicle') for loc in location_names
                       for vehicle in country_vehicle_map[loc]
                      for nutrient in vehicle_nutrient_map[vehicle]],
            columns=['location_name','vehicle','nutrient','value_description']).sort_values(['location_name','vehicle','nutrient'])
target_a = target_a[(target_a.nutrient.isin(nutrients))]

target_b = pd.DataFrame([(loc,vehicle,'na',val) for loc in location_names
                       for vehicle in country_vehicle_map[loc]
                      for val in ['percent of population eating industrially produced vehicle',
       'percent of population eating vehicle']],
            columns=['location_name','vehicle','nutrient','value_description'])

sortvars = ['location_name','vehicle','value_description','nutrient']
target = target_a.append(target_b).sort_values(sortvars).set_index(sortvars)

In [None]:
target = target.reset_index()
rcols = target.columns.tolist()

In [None]:
check_cols = ['location_id','location_name','urbanicity','subnational_name','vehicle','value_description','nutrient','value_mean','value_025_percentile',
       'value_975_percentile','sub_population','source_year','notes','source_citation','source_link','data_choice_notes']

def filter_data(country, vehicle, val):    
    output = df.loc[(df.location_name==country)
           & (df.vehicle==vehicle)
           & (df.value_description==val)
           & (df.value_mean.notna()),check_cols]
    
    return output

In [None]:
def check_one_country(country):
    vehicles = ['oil', 'wheat flour', 'salt', 'maize flour', 'rice', 'bouillon']
    values_gold = ['percent of population eating fortified vehicle',
               'percent of population eating industrially produced vehicle',
               'percent of population eating vehicle']
    return pd.concat([filter_data(country, vehicle, val) for vehicle in vehicles for val in values_gold])

In [None]:
usecols = ['location_id','location_name','subnational_name','vehicle','value_description','nutrient','value_mean', 'value_025_percentile',
       'value_975_percentile']
subset_data = {}

In [None]:
for i in location_names:
    subset_data[i] = pd.DataFrame()

## Bangladesh

In [None]:
bangladesh = check_one_country('Bangladesh')

In [None]:
bangladesh.value_description.unique()

In [None]:
mult_estimates[(mult_estimates.location_name=="Bangladesh")]

In [None]:
## pct of pop eating industrially produced oil

In [None]:
# pct of pop eating industrially produced oil
bangladesh[(bangladesh.vehicle=="oil") & (bangladesh.value_description=="percent of population eating industrially produced vehicle")]

In [None]:
bangladesh.loc[(bangladesh.vehicle=="oil") &
           (bangladesh.value_description=="percent of population eating industrially produced vehicle") &
          (bangladesh.urbanicity=="mixed/both"),'data_choice_notes'] = "Keeping Aaron number (88%) and descarding Raghavan number (87%), as they are very similar, from the same year, and the Aaron number has uncertainty"

In [None]:
# keep the row with uncertainty, as values are very similar and from the same year

subset_data['Bangladesh'] = subset_data['Bangladesh'].append(bangladesh[(bangladesh.vehicle=="oil") &
           (bangladesh.value_description=="percent of population eating industrially produced vehicle") &
          (bangladesh.urbanicity=="mixed/both")])

In [None]:
bangladesh[(bangladesh.vehicle=="oil") & (bangladesh.value_description=="percent of population eating vehicle")]

In [None]:
# percent of pop eating oil
bangladesh.loc[(bangladesh.vehicle=="oil") & (bangladesh.value_description=="percent of population eating vehicle") &
          (bangladesh.source_citation=="Grant J Aaron, Valerie M Friesen, Svenja Jungjohann, Greg S Garrett, Lynnette M Neufeld, Mark Myatt, Coverage of Large-Scale Food Fortification of Edible Oil, Wheat Flour, and Maize Flour Varies Greatly by Vehicle and Country but Is Consistently Lower among the Most Vulnerable: Results from Coverage Surveys in 8 Countries, The Journal of Nutrition, Volume 147, Issue 5, May 2017, Pages 984S–994S, https://doi.org/10.3945/jn.116.245753"),'data_choice_notes'] = "Keeping Aaron (survey data) number from 2015 (100%), discarding Fiedler number (51-90%) from 2005, and discarding Levyraz number from 2011 (99.8%)"

In [None]:
subset_data['Bangladesh'] = subset_data['Bangladesh'].append(bangladesh[(bangladesh.vehicle=="oil") & (bangladesh.value_description=="percent of population eating vehicle") &
          (bangladesh.source_citation=="Grant J Aaron, Valerie M Friesen, Svenja Jungjohann, Greg S Garrett, Lynnette M Neufeld, Mark Myatt, Coverage of Large-Scale Food Fortification of Edible Oil, Wheat Flour, and Maize Flour Varies Greatly by Vehicle and Country but Is Consistently Lower among the Most Vulnerable: Results from Coverage Surveys in 8 Countries, The Journal of Nutrition, Volume 147, Issue 5, May 2017, Pages 984S–994S, https://doi.org/10.3945/jn.116.245753")])

In [None]:
# percent of pop eating wheat flour
bangladesh[(bangladesh.vehicle=="wheat flour") & (bangladesh.value_description=="percent of population eating vehicle")]

In [None]:
## keep the more recent, u5- and wra- specific vars}
bangladesh.loc[(bangladesh.vehicle=="wheat flour") & 
           (bangladesh.value_description=="percent of population eating vehicle") &
          (bangladesh.source_citation=="Leyvraz M, Laillou A, Rahman S, et al. An Assessment of the Potential Impact of Fortification of Staples and Condiments on Micronutrient Intake of Young Children and Women of Reproductive Age in Bangladesh. Nutrients 2016; 8: 541."),"data_choice_notes"] = "Keeping Levyraz numbers (42% and 76%), as they are u5- and WRA- specfic and more recent (2011). Discarding 2005 Fiedler all-populaion numbers"

In [None]:
subset_data['Bangladesh'] = subset_data['Bangladesh'].append(bangladesh[(bangladesh.vehicle=="wheat flour") & 
           (bangladesh.value_description=="percent of population eating vehicle") &
          (bangladesh.source_citation=="Leyvraz M, Laillou A, Rahman S, et al. An Assessment of the Potential Impact of Fortification of Staples and Condiments on Micronutrient Intake of Young Children and Women of Reproductive Age in Bangladesh. Nutrients 2016; 8: 541.")])

In [None]:
# ## need to use estimates for
# - pct eating fortified oil
# - pct eating fortifiable wheat
# - pct eating fortified wheat

In [None]:
subset_data['Bangladesh'] = subset_data['Bangladesh'].append(mult_estimates[(mult_estimates.location_name=="Bangladesh") 
               & (mult_estimates.vehicle=="oil")
              & (mult_estimates.nutrient=="vitamin a")])

mult_estimates[(mult_estimates.location_name=="Bangladesh") 
               & (mult_estimates.vehicle=="oil")
              & (mult_estimates.nutrient=="vitamin a")]

In [None]:
subset_data['Bangladesh'] = subset_data['Bangladesh'].append(mult_estimates[(mult_estimates.location_name=="Bangladesh") 
               & (mult_estimates.vehicle=="wheat flour")
              & (mult_estimates.nutrient.isin(['NA','iron','zinc','folic acid','vitamin a']))])

mult_estimates[(mult_estimates.location_name=="Bangladesh") 
               & (mult_estimates.vehicle=="wheat flour")
              & (mult_estimates.nutrient.isin(['NA','iron','zinc','folic acid','vitamin a']))]

In [None]:
subset_data['Bangladesh'][rcols + ['value_mean']].sort_values(rcols)

In [None]:
target[target.location_name=="Bangladesh"].merge(
    subset_data['Bangladesh'][rcols + ['value_mean']],
    on = rcols,
    how = 'left'
)

## Pakistan

In [None]:
pakistan = check_one_country('Pakistan')

In [None]:
pakistan.value_description.unique()

In [None]:
# percent of pop eating ind prod oil
pakistan[(pakistan.vehicle=="oil") &
         (pakistan.value_description=="percent of population eating industrially produced vehicle")]

In [None]:
#use source referring to oil as opposed to oil/ghee

pakistan.loc[(pakistan.vehicle=="oil") &
         (pakistan.value_description=="percent of population eating industrially produced vehicle") &
        (pakistan.source_citation=="GFDx"),"data_choice_notes"] = "Kept GFDx number (85%), discarded FACT survey number (99%), which asked about both oil and ghee"

subset_data['Pakistan'] = subset_data['Pakistan'].append(pakistan[(pakistan.vehicle=="oil") &
         (pakistan.value_description=="percent of population eating industrially produced vehicle") &
        (pakistan.source_citation=="GFDx")])



In [None]:
# percent of pop eating oil
pakistan[(pakistan.vehicle=="oil") &
         (pakistan.value_description=="percent of population eating vehicle")]

In [None]:
## these are all the same; take one

pakistan.loc[(pakistan.vehicle=="oil") &
         (pakistan.value_description=="percent of population eating vehicle") &
        (pakistan.subnational_name == "Balochistan"),"data_choice_notes"] = "Only one source"

subset_data['Pakistan'] = subset_data['Pakistan'].append(pakistan[(pakistan.vehicle=="oil") &
         (pakistan.value_description=="percent of population eating vehicle") &
        (pakistan.subnational_name == "Balochistan")])



In [None]:
df.loc[(df.location_name=="Pakistan") & 
  (df.vehicle=="oil") & (df.value_description=="percent of population eating fortified vehicle"),check_cols]

In [None]:
# same source; take all

pakistan.loc[(pakistan.vehicle=="oil") & (pakistan.value_description=="percent of population eating fortified vehicle"),'data_choice_notes'] = "Only one source"

subset_data['Pakistan'] = subset_data['Pakistan'].append(pakistan.loc[(pakistan.vehicle=="oil") & (pakistan.value_description=="percent of population eating fortified vehicle")])



In [None]:
# percent of pop eating ind prod wheat
pakistan[(pakistan.vehicle=="wheat flour") &
         (pakistan.value_description=="percent of population eating industrially produced vehicle")]

In [None]:
# only one source, take it

pakistan.loc[(pakistan.vehicle=="wheat flour") &
         (pakistan.value_description=="percent of population eating industrially produced vehicle"),"data_choice_notes"] = "Only one source"

subset_data['Pakistan'] = subset_data['Pakistan'].append(pakistan[(pakistan.vehicle=="wheat flour") &
         (pakistan.value_description=="percent of population eating industrially produced vehicle")])



In [None]:
# percent of pop eating wheat
pakistan[(pakistan.vehicle=="wheat flour") &
         (pakistan.value_description=="percent of population eating vehicle")]

In [None]:
#these are all the same; take one

pakistan.loc[(pakistan.vehicle=="wheat flour") &
         (pakistan.value_description=="percent of population eating vehicle") &
        (pakistan.subnational_name == "Balochistan"),"data_choice_notes"] = "Only one source"

subset_data['Pakistan'] = subset_data['Pakistan'].append(pakistan[(pakistan.vehicle=="wheat flour") &
         (pakistan.value_description=="percent of population eating vehicle") &
        (pakistan.subnational_name == "Balochistan")])

pakistan[(pakistan.vehicle=="wheat flour") &
         (pakistan.value_description=="percent of population eating vehicle") &
        (pakistan.subnational_name == "Balochistan")]

In [None]:
# percent of pop eating fortified wheat
# same source; take all
pakistan.loc[(pakistan.vehicle=="wheat flour") & (pakistan.value_description=="percent of population eating fortified vehicle"),"data_choice_notes"] = "Only one source"

subset_data['Pakistan'] = subset_data['Pakistan'].append(pakistan.loc[(pakistan.vehicle=="wheat flour") & (pakistan.value_description=="percent of population eating fortified vehicle")])

pakistan.loc[(pakistan.vehicle=="wheat flour") & (pakistan.value_description=="percent of population eating fortified vehicle")]

In [None]:
pakistan_estimates = mult_estimates[(mult_estimates.location_name=="Pakistan") 
               & (mult_estimates.vehicle=="wheat flour")
              & (mult_estimates.nutrient.isin(['zinc','folic acid','vitamin a']))]
pakistan_estimates

In [None]:
pakistan_estimates = pakistan_estimates.append(pd.DataFrame({
    'location_name':['Pakistan'],
    'vehicle':['wheat flour'],
    'nutrient':['vitamin a'],
    'standard':['Unknown'],
    'value_description':['percent of population eating fortified vehicle'],
    'value_mean':[0],
    'estimation_status':['lack of evidence']
}))

In [None]:
subset_data['Pakistan'] = subset_data['Pakistan'].append(pakistan_estimates)

## United Republic of Tanzania

In [None]:
tanz = check_one_country('United Republic of Tanzania')

In [None]:
tanz.groupby(['vehicle','value_description']).mean()

In [None]:
tanz[(tanz.vehicle=="maize flour") &
     (tanz.value_description=="percent of population eating fortified vehicle") &
     (tanz.nutrient.isin(nutrients))].sort_values('source_citation')

In [None]:
# these are all the same, take the aaron version

tanz.loc[(tanz.vehicle=="maize flour") &
     (tanz.value_description=="percent of population eating fortified vehicle") &
     (tanz.nutrient.isin(nutrients)) &
     (tanz.source_year=='2011'),"data_choice_notes"] = "All sources (Aaron and GFDx) agreed."


subset_data['United Republic of Tanzania'] = subset_data['United Republic of Tanzania'].append(tanz[(tanz.vehicle=="maize flour") &
     (tanz.value_description=="percent of population eating fortified vehicle") &
     (tanz.nutrient.isin(nutrients)) &
     (tanz.source_year=='2011')])

tanz[(tanz.vehicle=="maize flour") &
     (tanz.value_description=="percent of population eating fortified vehicle") &
     (tanz.nutrient.isin(nutrients)) &
     (tanz.source_year=='2011')]

In [None]:
tanz[(tanz.vehicle=="maize flour") &
     (tanz.value_description=="percent of population eating industrially produced vehicle")].sort_values('source_citation')

In [None]:
# GFDx held the same values; took this over a number from a 2011 metanalysis on the efficacy of different
# vehicles for fortification

tanz.loc[(tanz.vehicle=="maize flour") &
     (tanz.value_description=="percent of population eating industrially produced vehicle") &
    (tanz.source_citation=="Grant J Aaron, Valerie M Friesen, Svenja Jungjohann, Greg S Garrett, Lynnette M Neufeld, Mark Myatt, Coverage of Large-Scale Food Fortification of Edible Oil, Wheat Flour, and Maize Flour Varies Greatly by Vehicle and Country but Is Consistently Lower among the Most Vulnerable: Results from Coverage Surveys in 8 Countries, The Journal of Nutrition, Volume 147, Issue 5, May 2017, Pages 984S–994S, https://doi.org/10.3945/jn.116.245770"),
        "data_choice_notes"]= "GFDx held the same values; took this over a number from a 2011 metanalysis on the efficacy of different ehicles for fortification"

subset_data['United Republic of Tanzania'] = subset_data['United Republic of Tanzania'].append(tanz[(tanz.vehicle=="maize flour") &
     (tanz.value_description=="percent of population eating industrially produced vehicle") &
    (tanz.source_citation=="Grant J Aaron, Valerie M Friesen, Svenja Jungjohann, Greg S Garrett, Lynnette M Neufeld, Mark Myatt, Coverage of Large-Scale Food Fortification of Edible Oil, Wheat Flour, and Maize Flour Varies Greatly by Vehicle and Country but Is Consistently Lower among the Most Vulnerable: Results from Coverage Surveys in 8 Countries, The Journal of Nutrition, Volume 147, Issue 5, May 2017, Pages 984S–994S, https://doi.org/10.3945/jn.116.245770")])

tanz[(tanz.vehicle=="maize flour") &
     (tanz.value_description=="percent of population eating industrially produced vehicle") &
    (tanz.source_citation=="Grant J Aaron, Valerie M Friesen, Svenja Jungjohann, Greg S Garrett, Lynnette M Neufeld, Mark Myatt, Coverage of Large-Scale Food Fortification of Edible Oil, Wheat Flour, and Maize Flour Varies Greatly by Vehicle and Country but Is Consistently Lower among the Most Vulnerable: Results from Coverage Surveys in 8 Countries, The Journal of Nutrition, Volume 147, Issue 5, May 2017, Pages 984S–994S, https://doi.org/10.3945/jn.116.245770")]

In [None]:
# these are the same, taking Aaron

tanz[(tanz.vehicle=="maize flour") &
     (tanz.value_description=="percent of population eating vehicle")].sort_values('source_citation')

In [None]:
tanz.loc[(tanz.vehicle=="maize flour") &
     (tanz.value_description=="percent of population eating vehicle") &
    (tanz.source_citation=="Grant J Aaron, Valerie M Friesen, Svenja Jungjohann, Greg S Garrett, Lynnette M Neufeld, Mark Myatt, Coverage of Large-Scale Food Fortification of Edible Oil, Wheat Flour, and Maize Flour Varies Greatly by Vehicle and Country but Is Consistently Lower among the Most Vulnerable: Results from Coverage Surveys in 8 Countries, The Journal of Nutrition, Volume 147, Issue 5, May 2017, Pages 984S–994S, https://doi.org/10.3945/jn.116.245770"),
    "data_choice_notes"] = "All sources (Aaron and GFDx) agreed."

subset_data['United Republic of Tanzania'] = subset_data['United Republic of Tanzania'].append(tanz[(tanz.vehicle=="maize flour") &
     (tanz.value_description=="percent of population eating vehicle") &
    (tanz.source_citation=="Grant J Aaron, Valerie M Friesen, Svenja Jungjohann, Greg S Garrett, Lynnette M Neufeld, Mark Myatt, Coverage of Large-Scale Food Fortification of Edible Oil, Wheat Flour, and Maize Flour Varies Greatly by Vehicle and Country but Is Consistently Lower among the Most Vulnerable: Results from Coverage Surveys in 8 Countries, The Journal of Nutrition, Volume 147, Issue 5, May 2017, Pages 984S–994S, https://doi.org/10.3945/jn.116.245770")])

tanz[(tanz.vehicle=="maize flour") &
     (tanz.value_description=="percent of population eating vehicle") &
    (tanz.source_citation=="Grant J Aaron, Valerie M Friesen, Svenja Jungjohann, Greg S Garrett, Lynnette M Neufeld, Mark Myatt, Coverage of Large-Scale Food Fortification of Edible Oil, Wheat Flour, and Maize Flour Varies Greatly by Vehicle and Country but Is Consistently Lower among the Most Vulnerable: Results from Coverage Surveys in 8 Countries, The Journal of Nutrition, Volume 147, Issue 5, May 2017, Pages 984S–994S, https://doi.org/10.3945/jn.116.245770")]

In [None]:
## oil

In [None]:
tanz[(tanz.vehicle=="oil") &
     (tanz.value_description=="percent of population eating fortified vehicle") &
     (tanz.nutrient.isin(nutrients))].sort_values('source_citation')

In [None]:
## these are the same, taking aaron

tanz.loc[(tanz.vehicle=="oil") &
     (tanz.value_description=="percent of population eating fortified vehicle") &
     (tanz.nutrient.isin(nutrients)) &
     (tanz.source_citation=="Grant J Aaron, Valerie M Friesen, Svenja Jungjohann, Greg S Garrett, Lynnette M Neufeld, Mark Myatt, Coverage of Large-Scale Food Fortification of Edible Oil, Wheat Flour, and Maize Flour Varies Greatly by Vehicle and Country but Is Consistently Lower among the Most Vulnerable: Results from Coverage Surveys in 8 Countries, The Journal of Nutrition, Volume 147, Issue 5, May 2017, Pages 984S–994S, https://doi.org/10.3945/jn.116.245770"),
    "data_choice_notes"] =  "All sources (Aaron and GFDx) agreed."


subset_data['United Republic of Tanzania'] = subset_data['United Republic of Tanzania'].append(tanz[(tanz.vehicle=="oil") &
     (tanz.value_description=="percent of population eating fortified vehicle") &
     (tanz.nutrient.isin(nutrients)) &
    (tanz.source_citation=="Grant J Aaron, Valerie M Friesen, Svenja Jungjohann, Greg S Garrett, Lynnette M Neufeld, Mark Myatt, Coverage of Large-Scale Food Fortification of Edible Oil, Wheat Flour, and Maize Flour Varies Greatly by Vehicle and Country but Is Consistently Lower among the Most Vulnerable: Results from Coverage Surveys in 8 Countries, The Journal of Nutrition, Volume 147, Issue 5, May 2017, Pages 984S–994S, https://doi.org/10.3945/jn.116.245770")])


tanz[(tanz.vehicle=="oil") &
     (tanz.value_description=="percent of population eating fortified vehicle") &
     (tanz.nutrient.isin(nutrients)) &
     (tanz.source_citation=="Grant J Aaron, Valerie M Friesen, Svenja Jungjohann, Greg S Garrett, Lynnette M Neufeld, Mark Myatt, Coverage of Large-Scale Food Fortification of Edible Oil, Wheat Flour, and Maize Flour Varies Greatly by Vehicle and Country but Is Consistently Lower among the Most Vulnerable: Results from Coverage Surveys in 8 Countries, The Journal of Nutrition, Volume 147, Issue 5, May 2017, Pages 984S–994S, https://doi.org/10.3945/jn.116.245770")]

In [None]:
tanz[(tanz.vehicle=="oil") &
     (tanz.value_description=="percent of population eating industrially produced vehicle")].sort_values('source_citation')

In [None]:
# GFDx held the same values (93 g/day); took this over a number from a 2011 metanalysis (54 g/day) on the efficacy of different
# vehicles for fortification

tanz.loc[(tanz.vehicle=="oil") &
     (tanz.value_description=="percent of population eating industrially produced vehicle") &
    (tanz.source_citation=="Grant J Aaron, Valerie M Friesen, Svenja Jungjohann, Greg S Garrett, Lynnette M Neufeld, Mark Myatt, Coverage of Large-Scale Food Fortification of Edible Oil, Wheat Flour, and Maize Flour Varies Greatly by Vehicle and Country but Is Consistently Lower among the Most Vulnerable: Results from Coverage Surveys in 8 Countries, The Journal of Nutrition, Volume 147, Issue 5, May 2017, Pages 984S–994S, https://doi.org/10.3945/jn.116.245770"),
        "data_choice_notes"] = "GFDx held the same values (93%); took this over a number from a 2011 metanalysis (54%) on the efficacy of different vehicles for fortification"

subset_data['United Republic of Tanzania'] = subset_data['United Republic of Tanzania'].append(tanz[(tanz.vehicle=="oil") &
     (tanz.value_description=="percent of population eating industrially produced vehicle") &
    (tanz.source_citation=="Grant J Aaron, Valerie M Friesen, Svenja Jungjohann, Greg S Garrett, Lynnette M Neufeld, Mark Myatt, Coverage of Large-Scale Food Fortification of Edible Oil, Wheat Flour, and Maize Flour Varies Greatly by Vehicle and Country but Is Consistently Lower among the Most Vulnerable: Results from Coverage Surveys in 8 Countries, The Journal of Nutrition, Volume 147, Issue 5, May 2017, Pages 984S–994S, https://doi.org/10.3945/jn.116.245770")])

tanz[(tanz.vehicle=="oil") &
     (tanz.value_description=="percent of population eating industrially produced vehicle") &
    (tanz.source_citation=="Grant J Aaron, Valerie M Friesen, Svenja Jungjohann, Greg S Garrett, Lynnette M Neufeld, Mark Myatt, Coverage of Large-Scale Food Fortification of Edible Oil, Wheat Flour, and Maize Flour Varies Greatly by Vehicle and Country but Is Consistently Lower among the Most Vulnerable: Results from Coverage Surveys in 8 Countries, The Journal of Nutrition, Volume 147, Issue 5, May 2017, Pages 984S–994S, https://doi.org/10.3945/jn.116.245770")]

In [None]:
# these are the same, taking Aaron

tanz[(tanz.vehicle=="oil") &
     (tanz.value_description=="percent of population eating vehicle")].sort_values('source_citation')

In [None]:
tanz.loc[(tanz.vehicle=="oil") &
     (tanz.value_description=="percent of population eating vehicle") &
    (tanz.source_citation=="Grant J Aaron, Valerie M Friesen, Svenja Jungjohann, Greg S Garrett, Lynnette M Neufeld, Mark Myatt, Coverage of Large-Scale Food Fortification of Edible Oil, Wheat Flour, and Maize Flour Varies Greatly by Vehicle and Country but Is Consistently Lower among the Most Vulnerable: Results from Coverage Surveys in 8 Countries, The Journal of Nutrition, Volume 147, Issue 5, May 2017, Pages 984S–994S, https://doi.org/10.3945/jn.116.245770"),
        "data_choice_notes"] = "All sources (Aaron and GFDx) agreed."

subset_data['United Republic of Tanzania'] = subset_data['United Republic of Tanzania'].append(tanz[(tanz.vehicle=="oil") &
     (tanz.value_description=="percent of population eating vehicle") &
    (tanz.source_citation=="Grant J Aaron, Valerie M Friesen, Svenja Jungjohann, Greg S Garrett, Lynnette M Neufeld, Mark Myatt, Coverage of Large-Scale Food Fortification of Edible Oil, Wheat Flour, and Maize Flour Varies Greatly by Vehicle and Country but Is Consistently Lower among the Most Vulnerable: Results from Coverage Surveys in 8 Countries, The Journal of Nutrition, Volume 147, Issue 5, May 2017, Pages 984S–994S, https://doi.org/10.3945/jn.116.245770")])

tanz[(tanz.vehicle=="oil") &
     (tanz.value_description=="percent of population eating vehicle") &
    (tanz.source_citation=="Grant J Aaron, Valerie M Friesen, Svenja Jungjohann, Greg S Garrett, Lynnette M Neufeld, Mark Myatt, Coverage of Large-Scale Food Fortification of Edible Oil, Wheat Flour, and Maize Flour Varies Greatly by Vehicle and Country but Is Consistently Lower among the Most Vulnerable: Results from Coverage Surveys in 8 Countries, The Journal of Nutrition, Volume 147, Issue 5, May 2017, Pages 984S–994S, https://doi.org/10.3945/jn.116.245770")]

In [None]:
tanz.groupby(['vehicle','value_description']).mean()

In [None]:
tanz[(tanz.vehicle=="wheat flour") &
     (tanz.value_description=="percent of population eating fortified vehicle") &
     (tanz.nutrient.isin(nutrients))].sort_values('source_citation')

In [None]:
# all the same, taking the aaron source

tanz.loc[(tanz.vehicle=="wheat flour") &
     (tanz.value_description=="percent of population eating fortified vehicle") &
     (tanz.nutrient.isin(nutrients)) &
    (tanz.source_citation=="Grant J Aaron, Valerie M Friesen, Svenja Jungjohann, Greg S Garrett, Lynnette M Neufeld, Mark Myatt, Coverage of Large-Scale Food Fortification of Edible Oil, Wheat Flour, and Maize Flour Varies Greatly by Vehicle and Country but Is Consistently Lower among the Most Vulnerable: Results from Coverage Surveys in 8 Countries, The Journal of Nutrition, Volume 147, Issue 5, May 2017, Pages 984S–994S, https://doi.org/10.3945/jn.116.245770"),
    "data_choice_notes"] = "All sources (Aaron and GFDx) agreed."

subset_data['United Republic of Tanzania'] = subset_data['United Republic of Tanzania'].append(tanz[(tanz.vehicle=="wheat flour") &
     (tanz.value_description=="percent of population eating fortified vehicle") &
     (tanz.nutrient.isin(nutrients)) &
    (tanz.source_citation=="Grant J Aaron, Valerie M Friesen, Svenja Jungjohann, Greg S Garrett, Lynnette M Neufeld, Mark Myatt, Coverage of Large-Scale Food Fortification of Edible Oil, Wheat Flour, and Maize Flour Varies Greatly by Vehicle and Country but Is Consistently Lower among the Most Vulnerable: Results from Coverage Surveys in 8 Countries, The Journal of Nutrition, Volume 147, Issue 5, May 2017, Pages 984S–994S, https://doi.org/10.3945/jn.116.245770")])

tanz[(tanz.vehicle=="wheat flour") &
     (tanz.value_description=="percent of population eating fortified vehicle") &
     (tanz.nutrient.isin(nutrients)) &
    (tanz.source_citation=="Grant J Aaron, Valerie M Friesen, Svenja Jungjohann, Greg S Garrett, Lynnette M Neufeld, Mark Myatt, Coverage of Large-Scale Food Fortification of Edible Oil, Wheat Flour, and Maize Flour Varies Greatly by Vehicle and Country but Is Consistently Lower among the Most Vulnerable: Results from Coverage Surveys in 8 Countries, The Journal of Nutrition, Volume 147, Issue 5, May 2017, Pages 984S–994S, https://doi.org/10.3945/jn.116.245770")]

In [None]:
tanz[(tanz.vehicle=="wheat flour") &
     (tanz.value_description=="percent of population eating industrially produced vehicle")].sort_values('source_citation')

In [None]:
# GFDx held the same values (50.5 g/day); took this over a number from a 2011 metanalysis (33 g/day) on the efficacy of different
# vehicles for fortification

tanz.loc[(tanz.vehicle=="wheat flour") &
     (tanz.value_description=="percent of population eating industrially produced vehicle") &
    (tanz.source_citation=="Grant J Aaron, Valerie M Friesen, Svenja Jungjohann, Greg S Garrett, Lynnette M Neufeld, Mark Myatt, Coverage of Large-Scale Food Fortification of Edible Oil, Wheat Flour, and Maize Flour Varies Greatly by Vehicle and Country but Is Consistently Lower among the Most Vulnerable: Results from Coverage Surveys in 8 Countries, The Journal of Nutrition, Volume 147, Issue 5, May 2017, Pages 984S–994S, https://doi.org/10.3945/jn.116.245770"),
    "data_choice_notes"] = "GFDx held the same values (51%); took this over a number from a 2011 metanalysis (33%) on the efficacy of different vehicles for fortification"

subset_data['United Republic of Tanzania'] = subset_data['United Republic of Tanzania'].append(tanz[(tanz.vehicle=="wheat flour") &
     (tanz.value_description=="percent of population eating industrially produced vehicle") &
    (tanz.source_citation=="Grant J Aaron, Valerie M Friesen, Svenja Jungjohann, Greg S Garrett, Lynnette M Neufeld, Mark Myatt, Coverage of Large-Scale Food Fortification of Edible Oil, Wheat Flour, and Maize Flour Varies Greatly by Vehicle and Country but Is Consistently Lower among the Most Vulnerable: Results from Coverage Surveys in 8 Countries, The Journal of Nutrition, Volume 147, Issue 5, May 2017, Pages 984S–994S, https://doi.org/10.3945/jn.116.245770")])

tanz[(tanz.vehicle=="wheat flour") &
     (tanz.value_description=="percent of population eating industrially produced vehicle") &
    (tanz.source_citation=="Grant J Aaron, Valerie M Friesen, Svenja Jungjohann, Greg S Garrett, Lynnette M Neufeld, Mark Myatt, Coverage of Large-Scale Food Fortification of Edible Oil, Wheat Flour, and Maize Flour Varies Greatly by Vehicle and Country but Is Consistently Lower among the Most Vulnerable: Results from Coverage Surveys in 8 Countries, The Journal of Nutrition, Volume 147, Issue 5, May 2017, Pages 984S–994S, https://doi.org/10.3945/jn.116.245770")]

In [None]:
# these are the same, taking Aaron

tanz[(tanz.vehicle=="wheat flour") &
     (tanz.value_description=="percent of population eating vehicle")].sort_values('source_citation')

In [None]:
tanz.loc[(tanz.vehicle=="wheat flour") &
     (tanz.value_description=="percent of population eating vehicle") &
    (tanz.source_citation=="Grant J Aaron, Valerie M Friesen, Svenja Jungjohann, Greg S Garrett, Lynnette M Neufeld, Mark Myatt, Coverage of Large-Scale Food Fortification of Edible Oil, Wheat Flour, and Maize Flour Varies Greatly by Vehicle and Country but Is Consistently Lower among the Most Vulnerable: Results from Coverage Surveys in 8 Countries, The Journal of Nutrition, Volume 147, Issue 5, May 2017, Pages 984S–994S, https://doi.org/10.3945/jn.116.245770"),
    "data_choice_notes"] = "All sources (Aaron and GFDx) agreed."

subset_data['United Republic of Tanzania'] = subset_data['United Republic of Tanzania'].append(tanz[(tanz.vehicle=="wheat flour") &
     (tanz.value_description=="percent of population eating vehicle") &
    (tanz.source_citation=="Grant J Aaron, Valerie M Friesen, Svenja Jungjohann, Greg S Garrett, Lynnette M Neufeld, Mark Myatt, Coverage of Large-Scale Food Fortification of Edible Oil, Wheat Flour, and Maize Flour Varies Greatly by Vehicle and Country but Is Consistently Lower among the Most Vulnerable: Results from Coverage Surveys in 8 Countries, The Journal of Nutrition, Volume 147, Issue 5, May 2017, Pages 984S–994S, https://doi.org/10.3945/jn.116.245770")])

tanz[(tanz.vehicle=="wheat flour") &
     (tanz.value_description=="percent of population eating vehicle") &
    (tanz.source_citation=="Grant J Aaron, Valerie M Friesen, Svenja Jungjohann, Greg S Garrett, Lynnette M Neufeld, Mark Myatt, Coverage of Large-Scale Food Fortification of Edible Oil, Wheat Flour, and Maize Flour Varies Greatly by Vehicle and Country but Is Consistently Lower among the Most Vulnerable: Results from Coverage Surveys in 8 Countries, The Journal of Nutrition, Volume 147, Issue 5, May 2017, Pages 984S–994S, https://doi.org/10.3945/jn.116.245770")]

## Uganda

In [None]:
uganda = check_one_country('Uganda')

uganda.groupby(['vehicle','value_description']).mean()

In [None]:
uganda[(uganda.vehicle=="maize flour") &
     (uganda.value_description=="percent of population eating fortified vehicle") &
     (uganda.nutrient.isin(nutrients))].sort_values('source_citation')

In [None]:
#all the same, take aaron

uganda.loc[(uganda.vehicle=="maize flour") &
     (uganda.value_description=="percent of population eating fortified vehicle") &
     (uganda.nutrient.isin(nutrients)) &
      (uganda.source_link=="https://doi.org/10.3945/jn.116.245753"),
          "data_choice_notes"] = "All sources (Aaron and GFDx) agreed."

subset_data['Uganda'] = subset_data['Uganda'].append(uganda[(uganda.vehicle=="maize flour") &
     (uganda.value_description=="percent of population eating fortified vehicle") &
     (uganda.nutrient.isin(nutrients)) &
      (uganda.source_link=="https://doi.org/10.3945/jn.116.245753")])


uganda[(uganda.vehicle=="maize flour") &
     (uganda.value_description=="percent of population eating fortified vehicle") &
     (uganda.nutrient.isin(nutrients)) &
      (uganda.source_link=="https://doi.org/10.3945/jn.116.245753")]

In [None]:
uganda[(uganda.vehicle=="maize flour") &
     (uganda.value_description=="percent of population eating industrially produced vehicle")]

In [None]:
# GFDx held the same values (42 g/day); took this over a number from a
# 2006 datapoint (7 g/day) from a metanalysis on the efficacy of different vehicles for fortification

uganda.loc[(uganda.vehicle=="maize flour") &
     (uganda.value_description=="percent of population eating industrially produced vehicle") &
    (uganda.source_citation=="Grant J Aaron, Valerie M Friesen, Svenja Jungjohann, Greg S Garrett, Lynnette M Neufeld, Mark Myatt, Coverage of Large-Scale Food Fortification of Edible Oil, Wheat Flour, and Maize Flour Varies Greatly by Vehicle and Country but Is Consistently Lower among the Most Vulnerable: Results from Coverage Surveys in 8 Countries, The Journal of Nutrition, Volume 147, Issue 5, May 2017, Pages 984S–994S, https://doi.org/10.3945/jn.116.245771"),
      "data_choice_notes"] = "GFDx held the same values (42%); took this over a number from a 2006 datapoint (7%) from a metanalysis on the efficacy of different vehicles for fortification"

subset_data['Uganda'] = subset_data['Uganda'].append(uganda[(uganda.vehicle=="maize flour") &
     (uganda.value_description=="percent of population eating industrially produced vehicle") &
    (uganda.source_citation=="Grant J Aaron, Valerie M Friesen, Svenja Jungjohann, Greg S Garrett, Lynnette M Neufeld, Mark Myatt, Coverage of Large-Scale Food Fortification of Edible Oil, Wheat Flour, and Maize Flour Varies Greatly by Vehicle and Country but Is Consistently Lower among the Most Vulnerable: Results from Coverage Surveys in 8 Countries, The Journal of Nutrition, Volume 147, Issue 5, May 2017, Pages 984S–994S, https://doi.org/10.3945/jn.116.245771")])

uganda[(uganda.vehicle=="maize flour") &
     (uganda.value_description=="percent of population eating industrially produced vehicle") &
    (uganda.source_citation=="Grant J Aaron, Valerie M Friesen, Svenja Jungjohann, Greg S Garrett, Lynnette M Neufeld, Mark Myatt, Coverage of Large-Scale Food Fortification of Edible Oil, Wheat Flour, and Maize Flour Varies Greatly by Vehicle and Country but Is Consistently Lower among the Most Vulnerable: Results from Coverage Surveys in 8 Countries, The Journal of Nutrition, Volume 147, Issue 5, May 2017, Pages 984S–994S, https://doi.org/10.3945/jn.116.245771")]

In [None]:
uganda[(uganda.vehicle=="maize flour") &
     (uganda.value_description=="percent of population eating vehicle")]

In [None]:
#same values, take the aaron

uganda.loc[(uganda.vehicle=="maize flour") &
     (uganda.value_description=="percent of population eating vehicle") &
      (uganda.source_citation=="Grant J Aaron, Valerie M Friesen, Svenja Jungjohann, Greg S Garrett, Lynnette M Neufeld, Mark Myatt, Coverage of Large-Scale Food Fortification of Edible Oil, Wheat Flour, and Maize Flour Varies Greatly by Vehicle and Country but Is Consistently Lower among the Most Vulnerable: Results from Coverage Surveys in 8 Countries, The Journal of Nutrition, Volume 147, Issue 5, May 2017, Pages 984S–994S, https://doi.org/10.3945/jn.116.245771"),
      "data_choice_notes"] = "All sources (Aaron and GFDx) agreed."

subset_data['Uganda'] = subset_data['Uganda'].append(uganda[(uganda.vehicle=="maize flour") &
     (uganda.value_description=="percent of population eating vehicle") &
      (uganda.source_citation=="Grant J Aaron, Valerie M Friesen, Svenja Jungjohann, Greg S Garrett, Lynnette M Neufeld, Mark Myatt, Coverage of Large-Scale Food Fortification of Edible Oil, Wheat Flour, and Maize Flour Varies Greatly by Vehicle and Country but Is Consistently Lower among the Most Vulnerable: Results from Coverage Surveys in 8 Countries, The Journal of Nutrition, Volume 147, Issue 5, May 2017, Pages 984S–994S, https://doi.org/10.3945/jn.116.245771")])

uganda[(uganda.vehicle=="maize flour") &
     (uganda.value_description=="percent of population eating vehicle") &
      (uganda.source_citation=="Grant J Aaron, Valerie M Friesen, Svenja Jungjohann, Greg S Garrett, Lynnette M Neufeld, Mark Myatt, Coverage of Large-Scale Food Fortification of Edible Oil, Wheat Flour, and Maize Flour Varies Greatly by Vehicle and Country but Is Consistently Lower among the Most Vulnerable: Results from Coverage Surveys in 8 Countries, The Journal of Nutrition, Volume 147, Issue 5, May 2017, Pages 984S–994S, https://doi.org/10.3945/jn.116.245771")]

In [None]:
uganda.groupby(['vehicle','value_description']).mean()

In [None]:
uganda[(uganda.vehicle=="oil") & 
       (uganda.value_description=="percent of population eating fortified vehicle") &
       (uganda.nutrient.isin(nutrients))].sort_values('source_citation')

In [None]:
# values the same, take the aaron

uganda.loc[(uganda.vehicle=="oil") & 
       (uganda.value_description=="percent of population eating fortified vehicle") &
       (uganda.nutrient.isin(nutrients)) &
      (uganda.source_link=="https://doi.org/10.3945/jn.116.245753"),
          "data_choice_notes"] = "All sources (Aaron and GFDx) agreed."


subset_data['Uganda'] = subset_data['Uganda'].append(uganda[(uganda.vehicle=="oil") & 
       (uganda.value_description=="percent of population eating fortified vehicle") &
       (uganda.nutrient.isin(nutrients)) &
      (uganda.source_link=="https://doi.org/10.3945/jn.116.245753")]
)

uganda[(uganda.vehicle=="oil") & 
       (uganda.value_description=="percent of population eating fortified vehicle") &
       (uganda.nutrient.isin(nutrients)) &
      (uganda.source_link=="https://doi.org/10.3945/jn.116.245753")]


In [None]:
uganda[(uganda.vehicle=="oil") & 
       (uganda.value_description=="percent of population eating industrially produced vehicle")]

In [None]:
# GFDx held the same values (89 g/day); took this over a number from a
# 2011 metanalysis (54 g/day) on the efficacy of different vehicles for fortification

uganda.loc[(uganda.vehicle=="oil") & 
       (uganda.value_description=="percent of population eating industrially produced vehicle") &
      (uganda.source_link=="https://doi.org/10.3945/jn.116.245753"),
      "data_choice_notes"] = "GFDx held the same values (89%); took this over a number from a 2011 metanalysis (54%) on the efficacy of different vehicles for fortification"

subset_data['Uganda'] = subset_data['Uganda'].append(uganda[(uganda.vehicle=="oil") & 
       (uganda.value_description=="percent of population eating industrially produced vehicle") &
      (uganda.source_link=="https://doi.org/10.3945/jn.116.245753")])

uganda[(uganda.vehicle=="oil") & 
       (uganda.value_description=="percent of population eating industrially produced vehicle") &
      (uganda.source_link=="https://doi.org/10.3945/jn.116.245753")]

In [None]:
uganda[(uganda.vehicle=="oil") & 
       (uganda.value_description=="percent of population eating vehicle")]

In [None]:
#values the same; taking aaron

uganda.loc[(uganda.vehicle=="oil") & 
       (uganda.value_description=="percent of population eating vehicle") &
      (uganda.source_link=="https://doi.org/10.3945/jn.116.245753"),
          "data_choice_notes"] = "All sources (Aaron and GFDx) agreed."

subset_data['Uganda'] = subset_data['Uganda'].append(uganda[(uganda.vehicle=="oil") & 
       (uganda.value_description=="percent of population eating vehicle") &
      (uganda.source_link=="https://doi.org/10.3945/jn.116.245753")])

uganda[(uganda.vehicle=="oil") & 
       (uganda.value_description=="percent of population eating vehicle") &
      (uganda.source_link=="https://doi.org/10.3945/jn.116.245753")]

In [None]:
uganda.groupby(['vehicle','value_description']).mean()

In [None]:
## same vals, take aaron

uganda.loc[(uganda.vehicle=="wheat flour") & 
       (uganda.value_description=="percent of population eating fortified vehicle") &
       (uganda.nutrient.isin(nutrients)) &
      (uganda.source_link=="https://doi.org/10.3945/jn.116.245753"),
      "data_choice_notes"] = "All sources (Aaron and GFDx) agreed."

subset_data['Uganda'] = subset_data['Uganda'].append(
    uganda[(uganda.vehicle=="wheat flour") & 
       (uganda.value_description=="percent of population eating fortified vehicle") &
       (uganda.nutrient.isin(nutrients)) &
      (uganda.source_link=="https://doi.org/10.3945/jn.116.245753")]
)

uganda[(uganda.vehicle=="wheat flour") & 
       (uganda.value_description=="percent of population eating fortified vehicle") &
       (uganda.nutrient.isin(nutrients)) &
      (uganda.source_link=="https://doi.org/10.3945/jn.116.245753")]

In [None]:
uganda[(uganda.vehicle=="wheat flour") & 
       (uganda.value_description=="percent of population eating industrially produced vehicle")]

In [None]:
# GFDx held the same values (10.6 g/day); took this over a number from a
# 2016 metanalysis (8.5 g/day) on the efficacy of different vehicles for fortification


uganda.loc[(uganda.vehicle=="wheat flour") & 
       (uganda.value_description=="percent of population eating industrially produced vehicle") &
      (uganda.source_link=="https://doi.org/10.3945/jn.116.245753"),
      "data_choice_notes"] = "GFDx held the same values (11%); took this over a number from a 2016 metanalysis (9%) on the efficacy of different vehicles for fortification"

subset_data['Uganda'] = subset_data['Uganda'].append(
    uganda[(uganda.vehicle=="wheat flour") & 
       (uganda.value_description=="percent of population eating industrially produced vehicle") &
      (uganda.source_link=="https://doi.org/10.3945/jn.116.245753")]
)

uganda[(uganda.vehicle=="wheat flour") & 
       (uganda.value_description=="percent of population eating industrially produced vehicle") &
      (uganda.source_link=="https://doi.org/10.3945/jn.116.245753")]

In [None]:
# vals the same, take aaron

uganda[(uganda.vehicle=="wheat flour") & 
       (uganda.value_description=="percent of population eating vehicle")]

In [None]:
uganda.loc[(uganda.vehicle=="wheat flour") & 
       (uganda.value_description=="percent of population eating vehicle") & 
      (uganda.source_link=="https://doi.org/10.3945/jn.116.245753"),"data_choice_notes"] = "All sources (Aaron and GFDx) agreed."

subset_data['Uganda'] = subset_data['Uganda'].append(
    uganda[(uganda.vehicle=="wheat flour") & 
       (uganda.value_description=="percent of population eating vehicle") & 
      (uganda.source_link=="https://doi.org/10.3945/jn.116.245753")]
)

uganda[(uganda.vehicle=="wheat flour") & 
       (uganda.value_description=="percent of population eating vehicle") & 
      (uganda.source_link=="https://doi.org/10.3945/jn.116.245753")]

## South Africa

In [None]:
safrica = check_one_country('South Africa')

safrica.groupby(['vehicle','value_description']).mean()

In [None]:
safrica[(safrica.vehicle=="maize flour") &
     (safrica.value_description=="percent of population eating fortified vehicle")].sort_values('source_citation')

In [None]:
#only one source; we'll pop-weight these

safrica.loc[(safrica.vehicle=="maize flour") &
     (safrica.value_description=="percent of population eating fortified vehicle") &
     (safrica.nutrient.isin(nutrients)),"data_choice_notes"] = "Only one source."

subset_data['South Africa'] = subset_data['South Africa'].append(safrica[(safrica.vehicle=="maize flour") &
     (safrica.value_description=="percent of population eating fortified vehicle") &
     (safrica.nutrient.isin(nutrients))].sort_values('source_citation'))

safrica[(safrica.vehicle=="maize flour") &
     (safrica.value_description=="percent of population eating fortified vehicle") &
     (safrica.nutrient.isin(nutrients))].sort_values('source_citation')

In [None]:
#only one source (aaron); take it

safrica.loc[(safrica.vehicle=="maize flour") &
     (safrica.value_description=="percent of population eating industrially produced vehicle"),
           "data_choice_notes"] = "Only one source."

subset_data['South Africa'] = subset_data['South Africa'].append(safrica[(safrica.vehicle=="maize flour") &
     (safrica.value_description=="percent of population eating industrially produced vehicle")])

safrica[(safrica.vehicle=="maize flour") &
     (safrica.value_description=="percent of population eating industrially produced vehicle")]

In [None]:
#only one source (aaron); take it

safrica.loc[(safrica.vehicle=="maize flour") &
     (safrica.value_description=="percent of population eating vehicle"), "data_choice_notes"] = "Only one source."

subset_data['South Africa'] = subset_data['South Africa'].append(safrica[(safrica.vehicle=="maize flour") &
     (safrica.value_description=="percent of population eating vehicle")])

safrica[(safrica.vehicle=="maize flour") &
     (safrica.value_description=="percent of population eating vehicle")]

In [None]:
#this is somehow extracted twice, but only one source, so we'll take it

safrica.loc[(safrica.vehicle=="wheat flour") &
     (safrica.value_description=="percent of population eating fortified vehicle") &
    (safrica.nutrient.isin(nutrients)),"data_choice_notes"] = "Only one source."

subset_data['South Africa'] = subset_data['South Africa'].append(safrica[(safrica.vehicle=="wheat flour") &
     (safrica.value_description=="percent of population eating fortified vehicle") &
     (safrica.nutrient.isin(nutrients))].sort_values('source_citation').drop_duplicates())

safrica[(safrica.vehicle=="wheat flour") &
     (safrica.value_description=="percent of population eating fortified vehicle") &
    (safrica.nutrient.isin(nutrients))].sort_values('source_citation').drop_duplicates()

In [None]:
#only one source (aaron); take it

safrica.loc[(safrica.vehicle=="wheat flour") &
     (safrica.value_description=="percent of population eating industrially produced vehicle"),"data_choice_notes"] = "Only one source."

subset_data['South Africa'] = subset_data['South Africa'].append(safrica[(safrica.vehicle=="wheat flour") &
     (safrica.value_description=="percent of population eating industrially produced vehicle")])

safrica[(safrica.vehicle=="wheat flour") &
     (safrica.value_description=="percent of population eating industrially produced vehicle")]

In [None]:
#only one source (aaron); take it

safrica.loc[(safrica.vehicle=="wheat flour") &
     (safrica.value_description=="percent of population eating vehicle"),"data_choice_notes"] = "Only one source."


subset_data['South Africa'] = subset_data['South Africa'].append(safrica[(safrica.vehicle=="wheat flour") &
     (safrica.value_description=="percent of population eating vehicle")])

safrica[(safrica.vehicle=="wheat flour") &
     (safrica.value_description=="percent of population eating vehicle")]

In [None]:
outputs_dir = '/ihme/homes/beatrixh/vivarium_research_lsff/data_prep/outputs/'

pct_eating = pd.read_csv(outputs_dir + 'pct_eating_regression_estimates_3_15_2021.csv')
pct_ind_prod = pd.read_csv(outputs_dir + 'pct_eating_ind_prod_regression_estimates_3_15_2021.csv')
pct_fortified = pd.read_csv(outputs_dir + 'pct_eating_fortified_regression_estimates_3_15_2021.csv')

In [None]:
pct_eating[pct_eating.location_name=="South Africa"]

In [None]:
pct_ind_prod[pct_ind_prod.location_name=="South Africa"]

In [None]:
pct_fortified[pct_fortified.location_name=="South Africa"]


In [None]:
safrica_oil_estimates = pct_eating[pct_eating.location_name=="South Africa"].merge(
    pct_ind_prod[pct_ind_prod.location_name=="South Africa"], on = ['location_name','vehicle']
).merge(
    pct_fortified[pct_fortified.location_name=="South Africa"], on = ['location_name','vehicle']
)

safrica_oil_estimates = safrica_oil_estimates.melt(id_vars = ['location_name','vehicle'],
                                                   value_name = "value_mean", var_name = "value_description")

safrica_oil_estimates.value_description = safrica_oil_estimates.value_description.map({i:i.replace("_"," ") for i in safrica_oil_estimates.value_description})

safrica_oil_estimates['estimation_status'] = "regression"

In [None]:
safrica_oil_estimates

In [None]:
safrica_oil_estimates.loc[safrica_oil_estimates.value_description=="percent of population eating fortified vehicle",'nutrient'] = 'vitamin a'
safrica_oil_estimates.loc[safrica_oil_estimates.value_description!="percent of population eating fortified vehicle",'nutrient'] = 'na'

In [None]:
safrica_oil_estimates

In [None]:
subset_data['South Africa'] = subset_data['South Africa'].append(safrica_oil_estimates)

## pop-weight subnationals

In [None]:
scols = rcols # + ['source_link','source_citation','estimation_status','sub_population']

In [None]:
df[df.location_name.isin(location_names)].subnational_name.unique()

In [None]:
loc_metadata = get_locs(location_set_id=35, gbd_round_id=6, decomp_step="step4")

#combine subnational estimates by population-weighting

subnats = loc_metadata[loc_metadata.location_name.isin(['Balochistan', 'Punjab', 'Sindh', 'Eastern Cape', 'Gauteng']) & 
                      (loc_metadata.parent_id.isin([165, 196]))][['location_id','location_name','parent_id']]

subnats = subnats.rename(columns = {
    'location_id':'subnational_id',
    'location_name':'subnational_name'
})

subnat_pop = get_population(age_group_id=22, 
                     location_id=list(subnats.subnational_id),
                     year_id=2017,
                     sex_id=3,
                     gbd_round_id=6, 
                     decomp_step='step5')

subnats = subnats.merge(subnat_pop, left_on = 'subnational_id', right_on = 'location_id')[['subnational_id','subnational_name','population','parent_id']]

subnats['pop_denom'] = subnats.groupby('parent_id').transform('sum').population

subnats['subnat_pop_weight'] = subnats.population / subnats.pop_denom

In [None]:
subnats

In [None]:
tmp = subset_data['Pakistan']

In [None]:
tmp.loc[tmp.source_citation.isna(),'source_citation'] = 'na'
tmp.loc[tmp.source_link.isna(),'source_link'] = 'na'
tmp.loc[tmp.estimation_status.isna(),'estimation_status'] = 'na'

In [None]:
tmp.loc[tmp.value_025_percentile=='na','value_025_percentile'] = np.nan
tmp.loc[tmp.value_975_percentile=='na','value_975_percentile'] = np.nan

In [None]:
tmp.loc[tmp.estimation_status.isna(),'estimation_status'] = 'na'

In [None]:
tmp['is_dupl'] = tmp.duplicated(subset = rcols, keep = False)

In [None]:
tmp = tmp.merge(subnats[['subnational_name','subnat_pop_weight']], how = 'left')

In [None]:
tmp.subnat_pop_weight = tmp.subnat_pop_weight.astype(float)
tmp.value_mean = tmp.value_mean.astype(float)
tmp.value_025_percentile = tmp.value_025_percentile.astype(float)
tmp.value_975_percentile = tmp.value_975_percentile.astype(float)

In [None]:
tmp.loc[tmp.is_dupl,'value_mean'] = tmp.loc[tmp.is_dupl].value_mean * tmp.loc[tmp.is_dupl].subnat_pop_weight
tmp.loc[tmp.is_dupl,'value_025_percentile'] = tmp.value_025_percentile * tmp.subnat_pop_weight
tmp.loc[tmp.is_dupl,'value_975_percentile'] = tmp.value_975_percentile * tmp.subnat_pop_weight

In [None]:
tmp.loc[~tmp.is_dupl,'subnational_name'] = 'na'

In [None]:
tmp.value_mean = tmp.groupby(rcols).transform('sum').value_mean
tmp.value_025_percentile = tmp.groupby(rcols).transform('sum').value_025_percentile
tmp.value_975_percentile = tmp.groupby(rcols).transform('sum').value_975_percentile

In [None]:
tmp = tmp[['location_name','vehicle','value_description','nutrient','estimation_status','source_link','source_year','sub_population','value_mean','value_025_percentile','value_975_percentile','data_choice_notes']].drop_duplicates()

In [None]:
subset_data['Pakistan'] = tmp

In [None]:
## south africa

In [None]:
tmp = subset_data['South Africa']
tmp.loc[tmp.source_citation.isna(),'source_citation'] = 'na'
tmp.loc[tmp.source_citation.isna(),'source_link'] = 'na'

In [None]:
tmp['is_dupl'] = tmp.duplicated(subset = rcols, keep = False)

In [None]:
tmp = tmp.merge(subnats[['subnational_name','subnat_pop_weight']], how = 'left')

In [None]:
tmp.subnat_pop_weight = tmp.subnat_pop_weight.astype(float)
tmp.value_mean = tmp.value_mean.astype(float)
tmp.value_025_percentile = tmp.value_025_percentile.astype(float)
tmp.value_975_percentile = tmp.value_975_percentile.astype(float)

In [None]:
tmp.loc[tmp.is_dupl,'value_mean'] = tmp.loc[tmp.is_dupl].value_mean * tmp.loc[tmp.is_dupl].subnat_pop_weight
tmp.loc[tmp.is_dupl,'value_025_percentile'] = tmp.value_025_percentile * tmp.subnat_pop_weight
tmp.loc[tmp.is_dupl,'value_975_percentile'] = tmp.value_975_percentile * tmp.subnat_pop_weight

In [None]:
tmp.loc[~tmp.is_dupl,'subnational_name'] = 'na'

In [None]:
tmp.value_mean = tmp.groupby(rcols).transform('sum').value_mean
tmp.value_025_percentile = tmp.groupby(rcols).transform('sum').value_025_percentile
tmp.value_975_percentile = tmp.groupby(rcols).transform('sum').value_975_percentile

In [None]:
tmp = tmp[['location_name','vehicle','value_description','nutrient','estimation_status','source_link','source_year','sub_population','value_mean','value_025_percentile','value_975_percentile','data_choice_notes']].drop_duplicates()

In [None]:
subset_data['South Africa'] = tmp

## Check for missingness

In [None]:
all_data = pd.concat(list(subset_data.values()))

In [None]:
all_data.data_choice_notes.unique()

In [None]:
all_data.loc[~(all_data.nutrient.isin(['vitamin a','iron','zinc','folic acid'])),'nutrient'] = 'na'

In [None]:
all_data[rcols + ['value_mean','value_025_percentile','value_975_percentile','sub_population']]

In [None]:
check = target.merge(all_data[rcols + ['value_mean']], on = rcols, how = 'left')

In [None]:
check[check.value_mean.isna()]

In [None]:
assert(len(check[check.value_mean.isna()])==0), "there are target loc/vehcile/val/nutrient combos youre missing"

In [None]:
all_data.columns

In [None]:
output = all_data[rcols + ['value_mean','value_025_percentile', 'value_975_percentile','sub_population','estimation_status','source_citation','source_link','data_choice_notes']].sort_values(rcols).set_index(rcols)

In [None]:
## TODO

In [None]:
output[(output.estimation_status.isna()) & ((output.source_citation.isna()) | (output.source_link.isna()))]

In [None]:
df[df.source_link=="https://doi.org/10.3945/jn.116.245753"].source_citation.unique()[0]

In [None]:
output.loc[(output.estimation_status.isna()) & ((output.source_citation.isna()) & (output.source_link=="https://doi.org/10.3945/jn.116.245753")),'source_citation'] = 'Grant J Aaron, Valerie M Friesen, Svenja Jungjohann, Greg S Garrett, Lynnette M Neufeld, Mark Myatt, Coverage of Large-Scale Food Fortification of Edible Oil, Wheat Flour, and Maize Flour Varies Greatly by Vehicle and Country but Is Consistently Lower among the Most Vulnerable: Results from Coverage Surveys in 8 Countries, The Journal of Nutrition, Volume 147, Issue 5, May 2017, Pages 984S–994S, https://doi.org/10.3945/jn.116.245753'

In [None]:
output = output.reset_index()

In [None]:
##impute all missing CIs

# clean value_mean
output.loc[output.value_mean=='na','value_mean'] = np.nan
output.value_mean = output.value_mean.astype(float)

# clean 2.5th %ile
output.loc[output.value_025_percentile=='na','value_025_percentile'] = np.nan
output.value_025_percentile = output.value_025_percentile.astype(float)

# clean 97.5th %ile
output.loc[output.value_975_percentile=='na','value_975_percentile'] = np.nan
output.value_975_percentile = output.value_975_percentile.astype(float)


output['scale_over_mean'] = (output.value_975_percentile - output.value_025_percentile) / output.value_mean

r = output[['vehicle','scale_over_mean']].groupby('vehicle').mean().dropna().rename(columns={'scale_over_mean':'r'})

# add uncertainty
output = output.merge(r, on = 'vehicle', how = 'outer')
output['lower'] = np.clip(output.value_mean - (output.r * output.value_mean)/2, 0, 100)
output['upper'] = np.clip(output.value_mean + (output.r * output.value_mean)/2, 0, 100)

output.loc[(output.value_mean < output.value_025_percentile) | (output.value_025_percentile.isna()),'CI_source'] = "modeling"
output.loc[(output.value_mean > output.value_975_percentile) | (output.value_975_percentile.isna()),'CI_source'] = "modeling"

output.loc[output.CI_source.isna(),'CI_source'] = 'extraction'

output.loc[(output.value_mean < output.value_025_percentile) | (output.value_025_percentile.isna()),'value_025_percentile'] = output.loc[(output.value_mean < output.value_025_percentile) | (output.value_025_percentile.isna())].lower
output.loc[(output.value_mean > output.value_975_percentile) | (output.value_975_percentile.isna()),'value_975_percentile'] = output.loc[(output.value_mean > output.value_975_percentile) | (output.value_975_percentile.isna())].upper

output = output.drop(columns=['r','lower','upper','scale_over_mean']).set_index(rcols)

output.loc[output.estimation_status.isna(),'estimation_status'] = 'na'

In [None]:
save_path_tmp = '/ihme/homes/beatrixh/repos/scratch/pop_coverage_data_with_data_choice_notes_3_17_2021.csv'
output.to_csv(save_path_tmp)

In [None]:
## add loc_ids

In [None]:
output = loc_metadata[['location_id','location_name']].merge(output.reset_index(), on = 'location_name', how = 'right')

In [None]:
save_path = '/ihme/homes/beatrixh/vivarium_research_lsff/data_prep/outputs/population_coverage_data_tier2_locs_3_17_2021.csv'
output.to_csv(save_path, index = False)

In [None]:
output.head()

In [None]:
formatted_cols = ['location_id','location_name','sub_population','vehicle','value_description','nutrient','value_mean','value_025_percentile','value_975_percentile']

save_path_formatted = '/ihme/homes/beatrixh/vivarium_research_lsff/data_prep/outputs/population_coverage_data_tier2_input_data.csv'
output[formatted_cols].to_csv(save_path_formatted, index = False)