In [1]:
from db_queries import get_population, get_ids
from db_queries import get_location_metadata as get_locs

In [2]:
import pandas as pd, numpy as np

# LSFF: choose population coverage data by hand for tier 1 countries

## vehicles: Wheat flour, maize flour, oil


## countries: Ethiopia, India, Nigeria

In [3]:
nutrients = ['iron','zinc','folic acid','vitamin a']

In [4]:
data_path = '/ihme/homes/beatrixh/vivarium_research_lsff/data_prep/inputs/extraction_sheet_lsff_03_24_2021.3.csv'
assm_path = '/ihme/homes/beatrixh/vivarium_research_lsff/data_prep/inputs/extraction_sheet_lsff_assumed_coverage_03_24_2021.csv'

df = pd.read_csv(data_path)

In [5]:
df.loc[df.nutrient=="folic acid, folate, b9",'nutrient']= 'folic acid'

In [6]:
assum = pd.read_csv(assm_path)

In [7]:
assum.location_name.unique()

array(['Ethiopia', 'Myanmar', 'India'], dtype=object)

In [8]:
assum.loc[assum.nutrient=="folic acid, folate, b9",'nutrient']= 'folic acid'

In [9]:
df['estimation_status'] = 'na'

In [10]:
df['data_choice_notes'] = ""

In [11]:
mult_estimates_path = '/ihme/homes/beatrixh/vivarium_research_lsff/data_prep/outputs/lsff_data_estimated_03_22_2021.csv'

mult_estimates = pd.read_csv(mult_estimates_path)

In [12]:
#reformat
mult_estimates.loc[(mult_estimates.B_estimate!=1.0),'B'] = np.nan
mult_estimates.loc[(mult_estimates.C_estimate!=1.0),'C'] = np.nan

mult_estimates = pd.melt(mult_estimates,
                         id_vars = ['location_name','vehicle','nutrient','standard'],
                         value_vars = ['B','C'], var_name = 'value_description', value_name = 'value_mean').dropna()

mult_estimates.loc[(mult_estimates.value_description=="B"),'nutrient'] = 'NA'

mult_estimates = mult_estimates.drop_duplicates()

mult_estimates.value_description = mult_estimates.value_description.map({
    'B':'percent of population eating industrially produced vehicle',
    'C':'percent of population eating fortified vehicle'
})

In [13]:
mult_estimates['estimation_status'] = 'multiplicative'

In [14]:
def prep_reg_estimates(path):
    draws = [f'draw_{i}' for i in range(500)]

    df = pd.read_csv(path)
    df = df.groupby(['location_name','vehicle']).mean().reset_index()
    df['value_mean'] = df[draws].mean(axis=1)
    df['value_025_percentile'] = df[draws].quantile(.025, axis=1)
    df['value_975_percentile'] = df[draws].quantile(.975, axis=1)

    return df[['location_name','vehicle','value_mean']]

In [15]:
eating_oil_path_reg = '/ihme/homes/beatrixh/vivarium_research_lsff/data_prep/outputs/pct_eating_oil_regression_estimates_3_22_2021.csv'
eating_wheat_path_reg = '/ihme/homes/beatrixh/vivarium_research_lsff/data_prep/outputs/pct_eating_wheat_regression_estimates_3_22_2021.csv'

In [16]:
eating_oil_reg = prep_reg_estimates(eating_oil_path_reg)
eating_wheat_reg = prep_reg_estimates(eating_wheat_path_reg)

In [17]:
## load legal combos
import pickle
data_prep_dir = '/ihme/homes/beatrixh/vivarium_research_lsff/data_prep/inputs/'

with open(data_prep_dir + 'lsff_vehicle_nutrient_pairs.pickle', 'rb') as handle:
    vehicle_nutrient_map = pickle.load(handle)
    
with open(data_prep_dir + 'lsff_country_vehicle_pairs.pickle', 'rb') as handle:
    country_vehicle_map = pickle.load(handle)
    
with open(data_prep_dir + 'lsff_vehicle_country_pairs.pickle', 'rb') as handle:
    vehicle_country_map = pickle.load(handle)

In [18]:
location_names = ['Ethiopia','Nigeria','India']
vehicles = ['maize flour','wheat flour','oil']
nutrients = ['folic acid','iron','zinc','vitamin a']

In [19]:
# these are the vehicles per country we need
target_high_level = pd.DataFrame([(loc,v) for loc in location_names for v in country_vehicle_map[loc]],
            columns=['location_name','vehicle']).sort_values(['location_name','vehicle'])

target_high_level = target_high_level[target_high_level.vehicle.isin(vehicles)].set_index(['location_name','vehicle'])

target_high_level

location_name,vehicle
Ethiopia,maize flour
Ethiopia,oil
Ethiopia,wheat flour
India,oil
India,wheat flour
Nigeria,maize flour
Nigeria,oil
Nigeria,wheat flour


In [20]:
target_a = pd.DataFrame([(loc,vehicle,nutrient,'percent of population eating fortified vehicle') for loc in location_names
                       for vehicle in country_vehicle_map[loc]
                      for nutrient in vehicle_nutrient_map[vehicle]],
            columns=['location_name','vehicle','nutrient','value_description']).sort_values(['location_name','vehicle','nutrient'])
target_a = target_a[(target_a.nutrient.isin(nutrients))]

target_b = pd.DataFrame([(loc,vehicle,'na',val) for loc in location_names
                       for vehicle in country_vehicle_map[loc]
                      for val in ['percent of population eating industrially produced vehicle',
       'percent of population eating vehicle']],
            columns=['location_name','vehicle','nutrient','value_description'])

sortvars = ['location_name','vehicle','value_description','nutrient']
target = target_a.append(target_b)
target = target[(target.vehicle.isin(vehicles)) & (target.nutrient.isin(nutrients + ['na']))].sort_values(sortvars).set_index(sortvars)

In [21]:
target = target.reset_index()
rcols = target.columns.tolist()

In [22]:
check_cols = ['location_id','location_name','urbanicity','subnational_name','vehicle','value_description','nutrient','value_mean','value_025_percentile',
       'value_975_percentile','sub_population','source_year','notes','source_citation','source_link','inclusion_justification','included','data_choice_notes']

def filter_data(country, vehicle, val):    
    output = df.loc[(df.location_name==country)
           & (df.vehicle==vehicle)
           & (df.value_description==val)
           & (df.value_mean.notna()),check_cols]
    
    return output

In [23]:
def check_one_country(country):
    vehicles = ['oil', 'wheat flour', 'salt', 'maize flour', 'rice', 'bouillon']
    values_gold = ['percent of population eating fortified vehicle',
               'percent of population eating industrially produced vehicle',
               'percent of population eating vehicle']
    return pd.concat([filter_data(country, vehicle, val) for vehicle in vehicles for val in values_gold])

In [24]:
usecols = ['location_id','location_name','subnational_name','vehicle','value_description','nutrient','value_mean', 'value_025_percentile',
       'value_975_percentile']
subset_data = {}

In [25]:
for i in location_names:
    subset_data[i] = pd.DataFrame()

## Ethiopia

In [26]:
ethiopia = check_one_country("Ethiopia")

ethiopia.value_description.unique()

Passing list-likes to .loc or [] with any missing label will raise
KeyError in the future, you can use .reindex() as an alternative.

See the documentation here:
https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#deprecate-loc-reindex-listlike
  return self._getitem_tuple(key)


array(['percent of population eating industrially produced vehicle',
       'percent of population eating vehicle',
       'percent of population eating fortified vehicle'], dtype=object)

In [36]:
ethiopia[(ethiopia.vehicle=="maize flour")]

Unnamed: 0,location_id,location_name,urbanicity,subnational_name,vehicle,value_description,nutrient,value_mean,value_025_percentile,value_975_percentile,sub_population,source_year,notes,source_citation,source_link,inclusion_justification,included,data_choice_notes


In [37]:
assum[(assum.location_name=="Ethiopia") & (assum.vehicle=="maize flour")]

Unnamed: 0,location_id,location_name,vehicle,nutrient,value_description,value_mean,value_025_percentile,value_975_percentile,sub_population,notes,Unnamed: 10
9,179,Ethiopia,maize flour,iron,percent of population eating fortified vehicle,0,0,0,total population,GFDx indicates no maize flour fortification st...,Added by paulina after reviewing input data/fo...
10,179,Ethiopia,maize flour,zinc,percent of population eating fortified vehicle,0,0,0,total population,GFDx indicates no maize flour fortification st...,Added by paulina after reviewing input data/fo...
11,179,Ethiopia,maize flour,folic acid,percent of population eating fortified vehicle,0,0,0,total population,GFDx indicates no maize flour fortification st...,Added by paulina after reviewing input data/fo...


In [39]:
subset_data['Ethiopia'] = subset_data['Ethiopia'].append(assum[(assum.location_name=="Ethiopia") & (assum.vehicle=="maize flour")])

assum[(assum.location_name=="Ethiopia") & (assum.vehicle=="maize flour")]

In [42]:
mult_estimates[(mult_estimates.location_name=="Ethiopia")]

Unnamed: 0,location_name,vehicle,nutrient,standard,value_description,value_mean,estimation_status
458,Ethiopia,wheat flour,vitamin b12,Voluntary,percent of population eating fortified vehicle,0.0,multiplicative
459,Ethiopia,wheat flour,vitamin b1,Voluntary,percent of population eating fortified vehicle,0.0,multiplicative
461,Ethiopia,oil,vitamin d,Voluntary,percent of population eating fortified vehicle,0.0,multiplicative


## Nigeria

In [34]:
nigeria = check_one_country("Nigeria")

nigeria.value_description.unique()

array(['percent of population eating fortified vehicle',
       'percent of population eating industrially produced vehicle',
       'percent of population eating vehicle'], dtype=object)

In [35]:
nigeria[(nigeria.vehicle=="maize flour")]

Unnamed: 0,location_id,location_name,urbanicity,subnational_name,vehicle,value_description,nutrient,value_mean,value_025_percentile,value_975_percentile,sub_population,source_year,notes,source_citation,source_link,inclusion_justification,included,data_choice_notes
98,,Nigeria,,lagos,maize flour,percent of population eating fortified vehicle,folic acid,0.2,0.0,0.5,total population,2015,"Raw coverage of wheat flour, maize flour, and ...","Grant J Aaron, Valerie M Friesen, Svenja Jungj...",https://doi.org/10.3945/jn.116.245753,,,
99,,Nigeria,,lagos,maize flour,percent of population eating fortified vehicle,vitamin a,0.2,0.0,0.5,total population,2015,"Raw coverage of wheat flour, maize flour, and ...","Grant J Aaron, Valerie M Friesen, Svenja Jungj...",https://doi.org/10.3945/jn.116.245753,,,
100,,Nigeria,,lagos,maize flour,percent of population eating fortified vehicle,zinc,0.2,0.0,0.5,total population,2015,"Raw coverage of wheat flour, maize flour, and ...","Grant J Aaron, Valerie M Friesen, Svenja Jungj...",https://doi.org/10.3945/jn.116.245753,,,
127,,Nigeria,,kano,maize flour,percent of population eating fortified vehicle,folic acid,1.7,0.9,2.6,total population,2015,"Raw coverage of wheat flour, maize flour, and ...","Grant J Aaron, Valerie M Friesen, Svenja Jungj...",https://doi.org/10.3945/jn.116.245753,,,
128,,Nigeria,,kano,maize flour,percent of population eating fortified vehicle,vitamin a,1.7,0.9,2.6,total population,2015,"Raw coverage of wheat flour, maize flour, and ...","Grant J Aaron, Valerie M Friesen, Svenja Jungj...",https://doi.org/10.3945/jn.116.245753,,,
129,,Nigeria,,kano,maize flour,percent of population eating fortified vehicle,zinc,1.7,0.9,2.6,total population,2015,"Raw coverage of wheat flour, maize flour, and ...","Grant J Aaron, Valerie M Friesen, Svenja Jungj...",https://doi.org/10.3945/jn.116.245753,,,
97,,Nigeria,,lagos,maize flour,percent of population eating industrially prod...,na,2.9,1.8,4.0,total population,2015,"Raw coverage of wheat flour, maize flour, and ...","Grant J Aaron, Valerie M Friesen, Svenja Jungj...",https://doi.org/10.3945/jn.116.245753,,,
109,,Nigeria,,kano,maize flour,percent of population eating industrially prod...,na,11.0,9.0,13.1,total population,2015,"Raw coverage of wheat flour, maize flour, and ...","Grant J Aaron, Valerie M Friesen, Svenja Jungj...",https://doi.org/10.3945/jn.116.245755,,,
96,,Nigeria,,lagos,maize flour,percent of population eating vehicle,na,12.2,10.0,14.4,total population,2015,"Raw coverage of wheat flour, maize flour, and ...","Grant J Aaron, Valerie M Friesen, Svenja Jungj...",https://doi.org/10.3945/jn.116.245753,,,
103,,Nigeria,,kano,maize flour,percent of population eating vehicle,na,77.1,74.4,79.9,total population,2015,"Raw coverage of wheat flour, maize flour, and ...","Grant J Aaron, Valerie M Friesen, Svenja Jungj...",https://doi.org/10.3945/jn.116.245754,,,


In [29]:
break

SyntaxError: 'break' outside loop (<ipython-input-29-6aaf1f276005>, line 4)

## Kenya

In [None]:
kenya = check_one_country("Kenya")

kenya.value_description.unique()

In [30]:
mult_estimates[(mult_estimates.location_name=="Kenya")]

Unnamed: 0,location_name,vehicle,nutrient,standard,value_description,value_mean,estimation_status
511,Kenya,wheat flour,iron,Mandatory,percent of population eating fortified vehicle,33.83575,multiplicative
512,Kenya,wheat flour,zinc,Mandatory,percent of population eating fortified vehicle,34.703333,multiplicative
513,Kenya,wheat flour,folic acid,Mandatory,percent of population eating fortified vehicle,34.703333,multiplicative
514,Kenya,wheat flour,vitamin b12,Mandatory,percent of population eating fortified vehicle,34.703333,multiplicative
515,Kenya,wheat flour,vitamin b1,Mandatory,percent of population eating fortified vehicle,34.703333,multiplicative
516,Kenya,wheat flour,vitamin a,Mandatory,percent of population eating fortified vehicle,34.703333,multiplicative
518,Kenya,oil,vitamin d,Unknown,percent of population eating fortified vehicle,0.0,multiplicative
519,Kenya,maize flour,iron,Mandatory,percent of population eating fortified vehicle,5.317153,multiplicative
520,Kenya,maize flour,zinc,Mandatory,percent of population eating fortified vehicle,0.0,multiplicative
521,Kenya,maize flour,folic acid,Mandatory,percent of population eating fortified vehicle,0.0,multiplicative


## pop-weight subnationals

In [None]:
df[df.location_name.isin(location_names)].subnational_name.unique()

In [None]:
# 'Kitui', 'Vihiga' are relevant; 'Ougadougou', 'Gnagna' are from burkina faso, where we dont have to do any subnat weighting

In [None]:
loc_metadata = get_locs(location_set_id=35, gbd_round_id=6, decomp_step="step4")

#combine subnational estimates by population-weighting

subnats = loc_metadata[loc_metadata.location_name.isin(['Kitui', 'Vihiga', 'Ougadougou', 'Gnagna'])][['location_id','location_name','parent_id']]

subnats = subnats.rename(columns = {
    'location_id':'subnational_id',
    'location_name':'subnational_name'
})

subnat_pop = get_population(age_group_id=22, 
                     location_id=list(subnats.subnational_id),
                     year_id=2017,
                     sex_id=3,
                     gbd_round_id=6, 
                     decomp_step='step5')

subnats = subnats.merge(subnat_pop, left_on = 'subnational_id', right_on = 'location_id')[['subnational_id','subnational_name','population','parent_id']]

subnats['pop_denom'] = subnats.groupby('parent_id').transform('sum').population

subnats['subnat_pop_weight'] = subnats.population / subnats.pop_denom

In [None]:
subnats

In [None]:
tmp = subset_data['Kenya']

In [None]:
scols = rcols + ['source_link','source_citation','estimation_status','sub_population']

In [None]:
tmp.loc[tmp.source_citation.isna(),'source_citation'] = 'na'
tmp.loc[tmp.source_link.isna(),'source_link'] = 'na'
tmp.loc[tmp.estimation_status.isna(),'estimation_status'] = 'na'

tmp.loc[tmp.value_025_percentile=='na','value_025_percentile'] = np.nan
tmp.loc[tmp.value_975_percentile=='na','value_975_percentile'] = np.nan
tmp.loc[tmp.estimation_status.isna(),'estimation_status'] = 'na'

tmp['is_dupl'] = tmp.duplicated(subset = rcols, keep = False)
tmp = tmp.merge(subnats[['subnational_name','subnat_pop_weight']], how = 'left')

In [None]:
tmp.subnat_pop_weight = tmp.subnat_pop_weight.astype(float)
tmp.value_mean = tmp.value_mean.astype(float)
tmp.value_025_percentile = tmp.value_025_percentile.astype(float)
tmp.value_975_percentile = tmp.value_975_percentile.astype(float)

In [None]:
tmp.loc[tmp.is_dupl,'value_mean'] = tmp.loc[tmp.is_dupl].value_mean * tmp.loc[tmp.is_dupl].subnat_pop_weight
tmp.loc[tmp.is_dupl,'value_025_percentile'] = tmp.value_025_percentile * tmp.subnat_pop_weight
tmp.loc[tmp.is_dupl,'value_975_percentile'] = tmp.value_975_percentile * tmp.subnat_pop_weight

In [None]:
tmp.loc[~tmp.is_dupl,'subnational_name'] = 'na'

In [None]:
scols

In [None]:
tmp_a = tmp[~tmp.is_dupl]

In [None]:
tmp_b = tmp[tmp.is_dupl]

In [None]:
tmp_b.data_choice_notes.unique()

In [None]:
rcols

In [None]:
tmp_b.value_mean = tmp_b.groupby(scols).transform('sum').value_mean
tmp_b.value_025_percentile = tmp_b.groupby(scols).transform('sum').value_025_percentile
tmp_b.value_975_percentile = tmp_b.groupby(scols).transform('sum').value_975_percentile

In [None]:
tmp_b = tmp_b[['location_name','vehicle','value_description','nutrient','estimation_status','source_link','source_citation','source_year','sub_population','value_mean','value_025_percentile','value_975_percentile','data_choice_notes',]].drop_duplicates()

In [None]:
tmp_b.value_mean = tmp_b.value_mean.astype(float)
tmp_b.value_025_percentile = tmp_b.value_025_percentile.astype(float)
tmp_b.value_975_percentile = tmp_b.value_975_percentile.astype(float)

In [None]:
tmp_b = tmp_b.groupby(['location_name','vehicle','value_description','nutrient','source_link','source_citation','source_year',]).mean().reset_index()

In [None]:
tmp_b['data_choice_notes'] = "Excluded central bureau of stats total pop number from 2005 in lieu of Ferguson rural u5 number from 2015. Note 72% of kenyan population is rural according to the world bank: https://data.worldbank.org/indicator/SP.RUR.TOTL.ZS?locations=KE'. Population weighted rural subnationals."

In [None]:
tmp = tmp_b.append(tmp_a)

In [None]:
tmp

In [None]:
subset_data['Kenya'] = tmp

## Check for missingness

In [None]:
all_data = pd.concat(list(subset_data.values()))

In [None]:
all_data.loc[(all_data.location_name=="Kenya") & (all_data.vehicle=="oil"),["value_description",'value_mean','source_citation',"data_choice_notes"]]

In [None]:
all_data.data_choice_notes.unique()

In [None]:
all_data.loc[~(all_data.nutrient.isin(['vitamin a','iron','zinc','folic acid'])),'nutrient'] = 'na'

In [None]:
all_data[rcols + ['value_mean','value_025_percentile','value_975_percentile','sub_population']]

In [None]:
target.loc[target.location_name=="Vietnam","location_name"] = "Viet Nam"

In [None]:
all_data.location_name.unique()

In [None]:
all_data[(all_data.location_name=="Viet Nam")]

In [None]:
check = target.merge(all_data[rcols + ['value_mean']], on = rcols, how = 'left')

In [None]:
check[check.value_mean.isna()]

In [None]:
assert(len(check[check.value_mean.isna()])==0), "there are target loc/vehcile/val/nutrient combos youre missing"

In [None]:
rcols

In [None]:
output = all_data[rcols + ['value_mean','value_025_percentile', 'value_975_percentile','sub_population','estimation_status','source_citation','source_link','data_choice_notes']].sort_values(rcols).set_index(rcols)

In [None]:
##impute all missing CIs

# clean value_mean
output.loc[output.value_mean=='na','value_mean'] = np.nan
output.value_mean = output.value_mean.astype(float)

# clean 2.5th %ile
output.loc[output.value_025_percentile=='na','value_025_percentile'] = np.nan
output.value_025_percentile = output.value_025_percentile.astype(float)

# clean 97.5th %ile
output.loc[output.value_975_percentile=='na','value_975_percentile'] = np.nan
output.value_975_percentile = output.value_975_percentile.astype(float)

In [None]:
output.loc[(output.value_mean > output.value_975_percentile),'value_975_percentile'] = np.nan

In [None]:
output.loc[(output.value_mean < output.value_025_percentile)]

In [None]:
output.loc[(output.value_mean == output.value_025_percentile)]

In [None]:
output['scale_over_mean'] = (output.value_975_percentile - output.value_025_percentile) / output.value_mean

In [None]:
output = output.reset_index()

In [None]:
r = output.loc[(output.scale_over_mean!=np.inf),['vehicle','scale_over_mean']]
# .groupby('vehicle').mean().dropna().rename(columns={'scale_over_mean':'r'}).reset_index()

In [None]:
r_mean = r.scale_over_mean.mean()

In [None]:
r.loc[r.vehicle=="maize flour",'scale_over_mean'] = r_mean

In [None]:
r = r.groupby('vehicle').mean().dropna().rename(columns={'scale_over_mean':'r'}).reset_index()

In [None]:
# add uncertainty
output = output.merge(r, on = 'vehicle', how = 'outer')

In [None]:
#increase uncertainty for regression estimates
output.loc[(output.estimation_status=='regression'),'r'] = output.r * 2

In [None]:
output['lower'] = np.clip(output.value_mean - (output.r * output.value_mean)/2, 0, 100)
output['upper'] = np.clip(output.value_mean + (output.r * output.value_mean)/2, 0, 100)

In [None]:
output

In [None]:
output.loc[(output.value_mean < output.value_025_percentile) | (output.value_025_percentile.isna()),'CI_source'] = "modeling"
output.loc[(output.value_mean > output.value_975_percentile) | (output.value_975_percentile.isna()),'CI_source'] = "modeling"

output.loc[output.CI_source.isna(),'CI_source'] = 'extraction'

output.loc[(output.value_mean < output.value_025_percentile) | (output.value_025_percentile.isna()),'value_025_percentile'] = output.loc[(output.value_mean < output.value_025_percentile) | (output.value_025_percentile.isna())].lower
output.loc[(output.value_mean > output.value_975_percentile) | (output.value_975_percentile.isna()),'value_975_percentile'] = output.loc[(output.value_mean > output.value_975_percentile) | (output.value_975_percentile.isna())].upper

output = output.drop(columns=['r','lower','upper','scale_over_mean']).set_index(rcols)

output.loc[output.estimation_status.isna(),'estimation_status'] = 'na'

In [None]:
output

In [None]:
output = output.reset_index()

In [None]:
output.loc[(output.location_name=="Vietnam"),'location_name'] = "Viet Nam"

In [None]:
sort_helper = {
    'percent of population eating fortified vehicle': 'C',
 'percent of population eating industrially produced vehicle': 'B',
 'percent of population eating vehicle': 'A'}

In [None]:
output['sort_helper'] = output.value_description.map(sort_helper)

In [None]:
output = output.sort_values(['location_name','vehicle','sort_helper','value_description','nutrient']).drop(columns='sort_helper')

In [None]:
output[(output.location_name=="Viet Nam") & (output.value_description=="percent of population eating vehicle")]

In [None]:
target

In [None]:
save_path_tmp = '/ihme/homes/beatrixh/repos/scratch/tier3_coverage_data_03_31_2021.csv'
output.to_csv(save_path_tmp, index = False)

In [None]:
save_path = '/ihme/homes/beatrixh/vivarium_research_lsff/data_prep/outputs/population_coverage_data_tier3_locs_3_31_2021.csv'
output.to_csv(save_path, index = False)

In [None]:
output.head()

In [None]:
formatted_output = output.copy()

In [None]:
loc_metadata = get_locs(location_set_id=35, gbd_round_id=6, decomp_step="step4")

loc_ids = loc_metadata.loc[(loc_metadata.location_name.isin(formatted_output.location_name.unique())),['location_id','location_name']]

formatted_output = formatted_output.merge(loc_ids, on = 'location_name', how = 'left')

In [None]:
formatted_usecols = ['location_id','location_name','sub_population',
                     'vehicle','value_description','nutrient','value_mean',
                     'value_025_percentile','value_975_percentile']

formatted_output = formatted_output[formatted_usecols]

In [None]:
save_formatted_output_path = '/ihme/homes/beatrixh/vivarium_research_lsff/data_prep/outputs/population_coverage_tier3_input_data.csv'
formatted_output.to_csv(save_formatted_output_path, index = False)

In [None]:
# prev_tiers_path = '/ihme/homes/beatrixh/vivarium_research_lsff/data_prep/outputs/lsff_input_coverage_data.csv'
# prev_tiers = pd.read_csv(prev_tiers_path)

# prev_tiers = prev_tiers[~(prev_tiers.location_name.isin(location_names + ['Viet Nam']))]

# all_tiers = prev_tiers.append(formatted_output)

# all_tiers

# all_tiers.to_csv(prev_tiers_path, index = False)

In [None]:
## TODO 
# check whats missing
# pop weight as necessary
# rerun the regressions --- make sure to fix burkina faso
# incorporate and format 