In [1]:
from db_queries import get_population, get_ids
from db_queries import get_location_metadata as get_locs

In [2]:
import pandas as pd, numpy as np

# LSFF: choose population coverage data by hand for tier 1 countries

## vehicles: Wheat flour, maize flour, oil


## countries: Ethiopia, India, Nigeria

In [3]:
## load legal combos
import pickle
data_prep_dir = '/ihme/homes/beatrixh/vivarium_research_lsff/data_prep/inputs/'

with open(data_prep_dir + 'lsff_vehicle_nutrient_pairs.pickle', 'rb') as handle:
    vehicle_nutrient_map = pickle.load(handle)
    
with open(data_prep_dir + 'lsff_country_vehicle_pairs.pickle', 'rb') as handle:
    country_vehicle_map = pickle.load(handle)
    
with open(data_prep_dir + 'lsff_vehicle_country_pairs.pickle', 'rb') as handle:
    vehicle_country_map = pickle.load(handle)

In [4]:
nutrients = ['iron','zinc','folic acid','vitamin a']

In [5]:
data_path = '/ihme/homes/beatrixh/vivarium_research_lsff/data_prep/inputs/extraction_sheet_lsff_03_24_2021.3.csv'
assm_path = '/ihme/homes/beatrixh/vivarium_research_lsff/data_prep/inputs/extraction_sheet_lsff_assumed_coverage_03_24_2021.csv'

df = pd.read_csv(data_path)

In [6]:
df.loc[df.nutrient=="folic acid, folate, b9",'nutrient']= 'folic acid'

In [7]:
assum = pd.read_csv(assm_path)

In [8]:
#these don't apply this time
assum.location_name.unique()

array(['Ethiopia', 'Myanmar', 'India'], dtype=object)

In [9]:
assum.loc[assum.nutrient=="folic acid, folate, b9",'nutrient']= 'folic acid'

In [10]:
df['estimation_status'] = 'na'

In [11]:
df['data_choice_notes'] = ""

In [12]:
mult_estimates_path = '/ihme/homes/beatrixh/vivarium_research_lsff/data_prep/outputs/lsff_data_estimated_03_26_2021.4.csv'

mult_estimates = pd.read_csv(mult_estimates_path)

In [13]:
#reformat
mult_estimates.loc[(mult_estimates.B_estimate!=1.0),'B'] = np.nan
mult_estimates.loc[(mult_estimates.C_estimate!=1.0),'C'] = np.nan

mult_estimates = pd.melt(mult_estimates,
                         id_vars = ['location_name','vehicle','nutrient','standard'],
                         value_vars = ['B','C'], var_name = 'value_description', value_name = 'value_mean').dropna()

mult_estimates.loc[(mult_estimates.value_description=="B"),'nutrient'] = 'NA'

mult_estimates = mult_estimates.drop_duplicates()

mult_estimates.value_description = mult_estimates.value_description.map({
    'B':'percent of population eating industrially produced vehicle',
    'C':'percent of population eating fortified vehicle'
})

In [14]:
mult_estimates['estimation_status'] = 'multiplicative'

In [15]:
def prep_reg_estimates(path):
    draws = [f'draw_{i}' for i in range(500)]

    df = pd.read_csv(path)
    df = df.groupby(['location_name','vehicle']).mean().reset_index()
    df['value_mean'] = df[draws].mean(axis=1)
    df['value_025_percentile'] = df[draws].quantile(.025, axis=1)
    df['value_975_percentile'] = df[draws].quantile(.975, axis=1)

    return df[['location_name','vehicle','value_mean']]

In [16]:
output_dir = '/ihme/homes/beatrixh/vivarium_research_lsff/data_prep/outputs/'

reg_fort_oil_path = output_dir + 'pct_eating_fortified_oil_regression_estimates_3_22_2021.csv'
reg_fort_wheat_path = output_dir + 'pct_eating_fortified_wheat_regression_estimates_3_22_2021.csv'
reg_fort_maize_path = output_dir + 'pct_eating_fortified_maize_regression_estimates_3_22_2021.csv'

est_fortified = pd.concat([prep_reg_estimates(path) for path in [reg_fort_oil_path,reg_fort_wheat_path,reg_fort_maize_path]])
est_fortified['value_description'] = "percent of population eating fortified vehicle"
est_fortified['estimation_status'] = "regression"

In [17]:
vn_pairs = pd.DataFrame([(v,n) for v in ['oil','wheat flour','maize flour'] for n in vehicle_nutrient_map[v]],
            columns=['vehicle','nutrient'])

In [18]:
est_fortified = est_fortified.merge(vn_pairs, on = 'vehicle', how = 'outer')[['location_name','vehicle','value_description','nutrient','value_mean','estimation_status']]

In [19]:
ind_prod_oil_path = '/ihme/homes/beatrixh/vivarium_research_lsff/data_prep/outputs/pct_eating_fortifiable_oil_regression_estimates_3_26_2021.csv'
ind_prod_wheat_path = '/ihme/homes/beatrixh/vivarium_research_lsff/data_prep/outputs/pct_eating_fortifiable_wheat_regression_estimates_3_26_2021.csv'
ind_prod_maize_path = '/ihme/homes/beatrixh/vivarium_research_lsff/data_prep/outputs/pct_eating_fortifiable_maize_regression_estimates_3_26_2021.csv'

est_fortifiable = pd.concat([prep_reg_estimates(path) for path in [ind_prod_oil_path,ind_prod_wheat_path,ind_prod_maize_path]])
est_fortifiable['value_description'] = "percent of population eating industrially produced vehicle"
est_fortifiable['estimation_status'] = "regression"
est_fortifiable['nutrient'] = "na"

In [20]:
eating_oil_path = output_dir + 'pct_eating_oil_regression_estimates_3_23_2021.csv'
eating_wheat_path = output_dir + 'pct_eating_wheat_regression_estimates_3_22_2021.csv'
eating_maize_path = output_dir + 'pct_eating_maize_regression_estimates_3_22_2021.csv'

est_eating = pd.concat([prep_reg_estimates(path) for path in [eating_oil_path,eating_wheat_path,eating_maize_path]])
est_eating['value_description'] = "percent of population eating vehicle"
est_eating['estimation_status'] = "regression"
est_eating['nutrient'] = "na"

In [21]:
reg_estimates = pd.concat([est_eating, est_fortifiable, est_fortified])

of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.


  """Entry point for launching an IPython kernel.


In [22]:
location_names = ['Ethiopia','Nigeria','India']
vehicles = ['maize flour','wheat flour','oil']
nutrients = ['folic acid','iron','zinc','vitamin a']

In [23]:
# these are the vehicles per country we need
target_high_level = pd.DataFrame([(loc,v) for loc in location_names for v in country_vehicle_map[loc]],
            columns=['location_name','vehicle']).sort_values(['location_name','vehicle'])

target_high_level = target_high_level[target_high_level.vehicle.isin(vehicles)].set_index(['location_name','vehicle'])

target_high_level

location_name,vehicle
Ethiopia,maize flour
Ethiopia,oil
Ethiopia,wheat flour
India,oil
India,wheat flour
Nigeria,maize flour
Nigeria,oil
Nigeria,wheat flour


In [24]:
target_a = pd.DataFrame([(loc,vehicle,nutrient,'percent of population eating fortified vehicle') for loc in location_names
                       for vehicle in country_vehicle_map[loc]
                      for nutrient in vehicle_nutrient_map[vehicle]],
            columns=['location_name','vehicle','nutrient','value_description']).sort_values(['location_name','vehicle','nutrient'])
target_a = target_a[(target_a.nutrient.isin(nutrients))]

target_b = pd.DataFrame([(loc,vehicle,'na',val) for loc in location_names
                       for vehicle in country_vehicle_map[loc]
                      for val in ['percent of population eating industrially produced vehicle',
       'percent of population eating vehicle']],
            columns=['location_name','vehicle','nutrient','value_description'])

In [25]:
sortvars = ['location_name','vehicle','value_description','nutrient']
target = target_a.append(target_b)
target = target[(target.vehicle.isin(vehicles)) & (target.nutrient.isin(nutrients + ['na']))].sort_values(sortvars).set_index(sortvars)

In [26]:
target = target.reset_index()
rcols = target.columns.tolist()

In [27]:
check_cols = ['location_id','location_name','urbanicity','subnational_name','vehicle','value_description','nutrient','value_mean','value_025_percentile',
       'value_975_percentile','sub_population','source_year','notes','source_citation','source_link','inclusion_justification','included','data_choice_notes']

def filter_data(country, vehicle, val):    
    output = df.loc[(df.location_name==country)
           & (df.vehicle==vehicle)
           & (df.value_description==val)
           & (df.value_mean.notna()),check_cols]
    
    return output

In [28]:
def check_one_country(country):
    vehicles = ['oil', 'wheat flour', 'salt', 'maize flour', 'rice', 'bouillon']
    values_gold = ['percent of population eating fortified vehicle',
               'percent of population eating industrially produced vehicle',
               'percent of population eating vehicle']
    return pd.concat([filter_data(country, vehicle, val) for vehicle in vehicles for val in values_gold])

In [29]:
usecols = ['location_id','location_name','subnational_name','vehicle','value_description','nutrient','value_mean', 'value_025_percentile',
       'value_975_percentile']
subset_data = {}

In [30]:
for i in location_names:
    subset_data[i] = pd.DataFrame()

In [31]:
location_names

['Ethiopia', 'Nigeria', 'India']

## Ethiopia

In [32]:
ethiopia = check_one_country("Ethiopia")

ethiopia[(ethiopia.vehicle.isin(vehicles))].groupby(['vehicle','value_description']).mean()

Passing list-likes to .loc or [] with any missing label will raise
KeyError in the future, you can use .reindex() as an alternative.

See the documentation here:
https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#deprecate-loc-reindex-listlike
  return self._getitem_tuple(key)


Unnamed: 0_level_0,Unnamed: 1_level_0,location_id
vehicle,value_description,Unnamed: 2_level_1
oil,percent of population eating industrially produced vehicle,
oil,percent of population eating vehicle,
wheat flour,percent of population eating industrially produced vehicle,
wheat flour,percent of population eating vehicle,


In [33]:
ethiopia.columns

Index(['location_id', 'location_name', 'urbanicity', 'subnational_name',
       'vehicle', 'value_description', 'nutrient', 'value_mean',
       'value_025_percentile', 'value_975_percentile', 'sub_population',
       'source_year', 'notes', 'source_citation', 'source_link',
       'inclusion_justification', 'included', 'data_choice_notes'],
      dtype='object')

In [34]:
ethiopia.loc[(ethiopia.vehicle=="oil") & (ethiopia.value_description=="percent of population eating industrially produced vehicle")][['location_name','urbanicity','sub_population','subnational_name','value_mean','source_year','source_citation']]

Unnamed: 0,location_name,urbanicity,sub_population,subnational_name,value_mean,source_year,source_citation
170,Ethiopia,rural,total population,na,19,2000,Government of the Federal Democratic Republic ...
171,Ethiopia,urban,total population,na,92,2000,Government of the Federal Democratic Republic ...
188,Ethiopia,,under-5,na,70,2015,"Hafebo AS, Ndao PBL, Wuehler S, et al. Overvie..."
189,Ethiopia,,women of reproductive age,na,50,2015,"Hafebo AS, Ndao PBL, Wuehler S, et al. Overvie..."
594,Ethiopia,mixed/both,total population,na,55,2000,Government of the Federal Democratic Republic ...


In [35]:
ethiopia.loc[(ethiopia.vehicle=="oil") &
             (ethiopia.value_description=="percent of population eating industrially produced vehicle") &
            (ethiopia.source_year=="2015"),"data_choice_notes"] = "kept most recent year / representative populations"

subset_data['Ethiopia'] = subset_data['Ethiopia'].append(
    ethiopia.loc[(ethiopia.vehicle=="oil") &
             (ethiopia.value_description=="percent of population eating industrially produced vehicle") &
            (ethiopia.source_year=="2015")]
)

ethiopia.loc[(ethiopia.vehicle=="oil") &
             (ethiopia.value_description=="percent of population eating industrially produced vehicle") &
            (ethiopia.source_year=="2015")]

Unnamed: 0,location_id,location_name,urbanicity,subnational_name,vehicle,value_description,nutrient,value_mean,value_025_percentile,value_975_percentile,sub_population,source_year,notes,source_citation,source_link,inclusion_justification,included,data_choice_notes
188,,Ethiopia,,na,oil,percent of population eating industrially prod...,na,70,,,under-5,2015,no uncertainty and no fortification status (ca...,"Hafebo AS, Ndao PBL, Wuehler S, et al. Overvie...",http://www.journalejnfs.com/index.php/EJNFS/ar...,,,kept most recent year / representative populat...
189,,Ethiopia,,na,oil,percent of population eating industrially prod...,na,50,,,women of reproductive age,2015,no uncertainty and no fortification status (ca...,"Hafebo AS, Ndao PBL, Wuehler S, et al. Overvie...",http://www.journalejnfs.com/index.php/EJNFS/ar...,,,kept most recent year / representative populat...


In [36]:
ethiopia.loc[(ethiopia.vehicle=="oil") & (ethiopia.value_description=="percent of population eating vehicle")][['location_name','urbanicity','sub_population','subnational_name','value_mean','source_year','source_citation','source_link']]

Unnamed: 0,location_name,urbanicity,sub_population,subnational_name,value_mean,source_year,source_citation,source_link
166,Ethiopia,mixed/both,,na,55.0,,,https://vivarium-research.readthedocs.io/en/la...
190,Ethiopia,,women of reproductive age,southern Ethiopia,3.7,2011.0,1A 24-h recall does not provide a valid estima...,https://www-sciencedirect-com.offcampus.lib.wa...


In [37]:
for i in ethiopia.loc[(ethiopia.vehicle=="oil") & (ethiopia.value_description=="percent of population eating vehicle")][['location_name','urbanicity','sub_population','subnational_name','value_mean','source_year','source_citation','source_link']].source_citation:
    print(i)

nan
1A 24-h recall does not provide a valid estimate of absolute nutrient intakes for rural women in southern Ethiopia. Nutrition 2011; 27: 919–24.


In [38]:
## not going to include this

In [39]:
ethiopia.loc[(ethiopia.vehicle=="wheat flour") & (ethiopia.value_description=="percent of population eating industrially produced vehicle")][['location_name','urbanicity','sub_population','subnational_name','value_mean','source_year','source_citation']]

Unnamed: 0,location_name,urbanicity,sub_population,subnational_name,value_mean,source_year,source_citation
173,Ethiopia,mixed/both,total population,na,28,2000,Government of the Federal Democratic Republic ...
175,Ethiopia,rural,total population,na,18,2000,Government of the Federal Democratic Republic ...
176,Ethiopia,urban,total population,na,89,2000,Government of the Federal Democratic Republic ...
178,Ethiopia,,under-5,na,27,2015,"Hafebo AS, Ndao PBL, Wuehler S, et al. Overvie..."
179,Ethiopia,,women of reproductive age,na,20,2015,"Hafebo AS, Ndao PBL, Wuehler S, et al. Overvie..."


In [40]:
subset_data['Ethiopia'] = subset_data['Ethiopia'].append(
    ethiopia.loc[(ethiopia.vehicle=="wheat flour") & 
             (ethiopia.value_description=="percent of population eating industrially produced vehicle") & 
             (ethiopia.source_year=="2015")]   
)

ethiopia.loc[(ethiopia.vehicle=="wheat flour") & 
             (ethiopia.value_description=="percent of population eating industrially produced vehicle") & 
             (ethiopia.source_year=="2015")]   

Unnamed: 0,location_id,location_name,urbanicity,subnational_name,vehicle,value_description,nutrient,value_mean,value_025_percentile,value_975_percentile,sub_population,source_year,notes,source_citation,source_link,inclusion_justification,included,data_choice_notes
178,,Ethiopia,,na,wheat flour,percent of population eating industrially prod...,na,27,na,na,under-5,2015,no uncertainty and no fortification status (ca...,"Hafebo AS, Ndao PBL, Wuehler S, et al. Overvie...",http://www.journalejnfs.com/index.php/EJNFS/ar...,,,
179,,Ethiopia,,na,wheat flour,percent of population eating industrially prod...,na,20,na,na,women of reproductive age,2015,no uncertainty and no fortification status (ca...,"Hafebo AS, Ndao PBL, Wuehler S, et al. Overvie...",http://www.journalejnfs.com/index.php/EJNFS/ar...,,,


In [41]:
for i in ethiopia.loc[(ethiopia.vehicle=="wheat flour") & 
             (ethiopia.value_description=="percent of population eating vehicle")].notes:
    print(i)

There was some issue refinding the original paper these were extracted from, would be worth someone double checking the concept model


In [42]:
mult_estimates[(mult_estimates.location_name=="Ethiopia")]

Unnamed: 0,location_name,vehicle,nutrient,standard,value_description,value_mean,estimation_status
458,Ethiopia,wheat flour,vitamin b12,Voluntary,percent of population eating fortified vehicle,0.0,multiplicative
459,Ethiopia,wheat flour,vitamin b1,Voluntary,percent of population eating fortified vehicle,0.0,multiplicative
461,Ethiopia,oil,vitamin d,Voluntary,percent of population eating fortified vehicle,0.0,multiplicative


In [43]:
subset_data['Ethiopia'][['location_name','vehicle','value_description','value_mean']]

Unnamed: 0,location_name,vehicle,value_description,value_mean
188,Ethiopia,oil,percent of population eating industrially prod...,70
189,Ethiopia,oil,percent of population eating industrially prod...,50
178,Ethiopia,wheat flour,percent of population eating industrially prod...,27
179,Ethiopia,wheat flour,percent of population eating industrially prod...,20


## Nigeria

In [44]:
nigeria = check_one_country("Nigeria")

nigeria[(nigeria.vehicle.isin(vehicles))].groupby(['vehicle','value_description']).mean()

Unnamed: 0_level_0,Unnamed: 1_level_0,location_id
vehicle,value_description,Unnamed: 2_level_1
maize flour,percent of population eating fortified vehicle,
maize flour,percent of population eating industrially produced vehicle,
maize flour,percent of population eating vehicle,
oil,percent of population eating fortified vehicle,
oil,percent of population eating industrially produced vehicle,
oil,percent of population eating vehicle,
wheat flour,percent of population eating fortified vehicle,
wheat flour,percent of population eating industrially produced vehicle,
wheat flour,percent of population eating vehicle,


In [45]:
subset_data['Nigeria'] = subset_data['Nigeria'].append(
    nigeria[(nigeria.vehicle=="maize flour") & (nigeria.value_description=="percent of population eating fortified vehicle")]
)

nigeria[(nigeria.vehicle=="maize flour") & (nigeria.value_description=="percent of population eating fortified vehicle")]

Unnamed: 0,location_id,location_name,urbanicity,subnational_name,vehicle,value_description,nutrient,value_mean,value_025_percentile,value_975_percentile,sub_population,source_year,notes,source_citation,source_link,inclusion_justification,included,data_choice_notes
98,,Nigeria,,lagos,maize flour,percent of population eating fortified vehicle,folic acid,0.2,0.0,0.5,total population,2015,"Raw coverage of wheat flour, maize flour, and ...","Grant J Aaron, Valerie M Friesen, Svenja Jungj...",https://doi.org/10.3945/jn.116.245753,,,
99,,Nigeria,,lagos,maize flour,percent of population eating fortified vehicle,vitamin a,0.2,0.0,0.5,total population,2015,"Raw coverage of wheat flour, maize flour, and ...","Grant J Aaron, Valerie M Friesen, Svenja Jungj...",https://doi.org/10.3945/jn.116.245753,,,
100,,Nigeria,,lagos,maize flour,percent of population eating fortified vehicle,zinc,0.2,0.0,0.5,total population,2015,"Raw coverage of wheat flour, maize flour, and ...","Grant J Aaron, Valerie M Friesen, Svenja Jungj...",https://doi.org/10.3945/jn.116.245753,,,
127,,Nigeria,,kano,maize flour,percent of population eating fortified vehicle,folic acid,1.7,0.9,2.6,total population,2015,"Raw coverage of wheat flour, maize flour, and ...","Grant J Aaron, Valerie M Friesen, Svenja Jungj...",https://doi.org/10.3945/jn.116.245753,,,
128,,Nigeria,,kano,maize flour,percent of population eating fortified vehicle,vitamin a,1.7,0.9,2.6,total population,2015,"Raw coverage of wheat flour, maize flour, and ...","Grant J Aaron, Valerie M Friesen, Svenja Jungj...",https://doi.org/10.3945/jn.116.245753,,,
129,,Nigeria,,kano,maize flour,percent of population eating fortified vehicle,zinc,1.7,0.9,2.6,total population,2015,"Raw coverage of wheat flour, maize flour, and ...","Grant J Aaron, Valerie M Friesen, Svenja Jungj...",https://doi.org/10.3945/jn.116.245753,,,


In [46]:
subset_data['Nigeria'] = subset_data['Nigeria'].append(
    nigeria[(nigeria.vehicle=="maize flour") &
            (nigeria.value_description=="percent of population eating industrially produced vehicle")]
)

nigeria[(nigeria.vehicle=="maize flour") &
        (nigeria.value_description=="percent of population eating industrially produced vehicle")]

Unnamed: 0,location_id,location_name,urbanicity,subnational_name,vehicle,value_description,nutrient,value_mean,value_025_percentile,value_975_percentile,sub_population,source_year,notes,source_citation,source_link,inclusion_justification,included,data_choice_notes
97,,Nigeria,,lagos,maize flour,percent of population eating industrially prod...,na,2.9,1.8,4.0,total population,2015,"Raw coverage of wheat flour, maize flour, and ...","Grant J Aaron, Valerie M Friesen, Svenja Jungj...",https://doi.org/10.3945/jn.116.245753,,,
109,,Nigeria,,kano,maize flour,percent of population eating industrially prod...,na,11.0,9.0,13.1,total population,2015,"Raw coverage of wheat flour, maize flour, and ...","Grant J Aaron, Valerie M Friesen, Svenja Jungj...",https://doi.org/10.3945/jn.116.245755,,,


In [47]:
subset_data['Nigeria'] = subset_data['Nigeria'].append(
    nigeria[(nigeria.vehicle=="maize flour") &
            (nigeria.value_description=="percent of population eating vehicle")]
)

nigeria[(nigeria.vehicle=="maize flour") &
        (nigeria.value_description=="percent of population eating vehicle")]

Unnamed: 0,location_id,location_name,urbanicity,subnational_name,vehicle,value_description,nutrient,value_mean,value_025_percentile,value_975_percentile,sub_population,source_year,notes,source_citation,source_link,inclusion_justification,included,data_choice_notes
96,,Nigeria,,lagos,maize flour,percent of population eating vehicle,na,12.2,10.0,14.4,total population,2015,"Raw coverage of wheat flour, maize flour, and ...","Grant J Aaron, Valerie M Friesen, Svenja Jungj...",https://doi.org/10.3945/jn.116.245753,,,
103,,Nigeria,,kano,maize flour,percent of population eating vehicle,na,77.1,74.4,79.9,total population,2015,"Raw coverage of wheat flour, maize flour, and ...","Grant J Aaron, Valerie M Friesen, Svenja Jungj...",https://doi.org/10.3945/jn.116.245754,,,


In [48]:
subset_data['Nigeria'] = subset_data['Nigeria'].append(
    nigeria[(nigeria.vehicle=="oil") &
        (nigeria.value_description=="percent of population eating fortified vehicle") &
       (nigeria.nutrient.isin(nutrients))]
)

nigeria[(nigeria.vehicle=="oil") &
        (nigeria.value_description=="percent of population eating fortified vehicle") &
       (nigeria.nutrient.isin(nutrients))]

Unnamed: 0,location_id,location_name,urbanicity,subnational_name,vehicle,value_description,nutrient,value_mean,value_025_percentile,value_975_percentile,sub_population,source_year,notes,source_citation,source_link,inclusion_justification,included,data_choice_notes
81,,Nigeria,,lagos,oil,percent of population eating fortified vehicle,vitamin a,7.2,5.5,8.9,total population,2015,"Raw coverage of wheat flour, maize flour, and ...","Grant J Aaron, Valerie M Friesen, Svenja Jungj...",https://doi.org/10.3945/jn.116.245753,,,
111,,Nigeria,,kano,oil,percent of population eating fortified vehicle,vitamin a,7.6,5.9,9.4,total population,2015,"Raw coverage of wheat flour, maize flour, and ...","Grant J Aaron, Valerie M Friesen, Svenja Jungj...",https://doi.org/10.3945/jn.116.245753,,,


In [49]:
subset_data['Nigeria'] = subset_data['Nigeria'].append(
    nigeria[(nigeria.vehicle=="oil") &
        (nigeria.value_description=="percent of population eating industrially produced vehicle")]
)

nigeria[(nigeria.vehicle=="oil") &
        (nigeria.value_description=="percent of population eating industrially produced vehicle")]

Unnamed: 0,location_id,location_name,urbanicity,subnational_name,vehicle,value_description,nutrient,value_mean,value_025_percentile,value_975_percentile,sub_population,source_year,notes,source_citation,source_link,inclusion_justification,included,data_choice_notes
83,,Nigeria,,lagos,oil,percent of population eating industrially prod...,na,22.7,19.9,25.5,total population,2015,"Raw coverage of wheat flour, maize flour, and ...","Grant J Aaron, Valerie M Friesen, Svenja Jungj...",https://doi.org/10.3945/jn.116.245753,,,
105,,Nigeria,,kano,oil,percent of population eating industrially prod...,na,35.9,32.7,39.1,total population,2015,"Raw coverage of wheat flour, maize flour, and ...","Grant J Aaron, Valerie M Friesen, Svenja Jungj...",https://doi.org/10.3945/jn.116.245755,,,


In [50]:
subset_data['Nigeria'] = subset_data['Nigeria'].append(
    nigeria[(nigeria.vehicle=="oil") &
        (nigeria.value_description=="percent of population eating vehicle")]
)

nigeria[(nigeria.vehicle=="oil") &
        (nigeria.value_description=="percent of population eating vehicle")]

Unnamed: 0,location_id,location_name,urbanicity,subnational_name,vehicle,value_description,nutrient,value_mean,value_025_percentile,value_975_percentile,sub_population,source_year,notes,source_citation,source_link,inclusion_justification,included,data_choice_notes
84,,Nigeria,,lagos,oil,percent of population eating vehicle,na,98.6,97.8,99.3,total population,2015,"Raw coverage of wheat flour, maize flour, and ...","Grant J Aaron, Valerie M Friesen, Svenja Jungj...",https://doi.org/10.3945/jn.116.245753,,,
101,,Nigeria,,kano,oil,percent of population eating vehicle,na,98.4,97.6,99.2,total population,2015,"Raw coverage of wheat flour, maize flour, and ...","Grant J Aaron, Valerie M Friesen, Svenja Jungj...",https://doi.org/10.3945/jn.116.245754,,,


In [51]:
subset_data['Nigeria'] = subset_data['Nigeria'].append(
    nigeria[(nigeria.vehicle=="wheat flour") &
        (nigeria.value_description=="percent of population eating fortified vehicle") &
       (nigeria.nutrient.isin(nutrients))]
)

nigeria[(nigeria.vehicle=="wheat flour") &
        (nigeria.value_description=="percent of population eating fortified vehicle") &
       (nigeria.nutrient.isin(nutrients))]

Unnamed: 0,location_id,location_name,urbanicity,subnational_name,vehicle,value_description,nutrient,value_mean,value_025_percentile,value_975_percentile,sub_population,source_year,notes,source_citation,source_link,inclusion_justification,included,data_choice_notes
50,,Nigeria,,kano,wheat flour,percent of population eating fortified vehicle,iron,22.7,20.0,25.5,total population,2015,"Raw coverage of wheat flour, maize flour, and ...","Grant J Aaron, Valerie M Friesen, Svenja Jungj...",https://doi.org/10.3945/jn.116.245753,,,
51,,Nigeria,,kano,wheat flour,percent of population eating fortified vehicle,folic acid,22.7,20.0,25.5,total population,2015,"Raw coverage of wheat flour, maize flour, and ...","Grant J Aaron, Valerie M Friesen, Svenja Jungj...",https://doi.org/10.3945/jn.116.245753,,,
82,,Nigeria,,lagos,wheat flour,percent of population eating fortified vehicle,zinc,5.4,3.8,6.9,total population,2015,"Raw coverage of wheat flour, maize flour, and ...","Grant J Aaron, Valerie M Friesen, Svenja Jungj...",https://doi.org/10.3945/jn.116.245753,,,
85,,Nigeria,,lagos,wheat flour,percent of population eating fortified vehicle,iron,5.4,3.8,6.9,total population,2015,"Raw coverage of wheat flour, maize flour, and ...","Grant J Aaron, Valerie M Friesen, Svenja Jungj...",https://doi.org/10.3945/jn.116.245753,,,
86,,Nigeria,,lagos,wheat flour,percent of population eating fortified vehicle,folic acid,5.4,3.8,6.9,total population,2015,"Raw coverage of wheat flour, maize flour, and ...","Grant J Aaron, Valerie M Friesen, Svenja Jungj...",https://doi.org/10.3945/jn.116.245753,,,
92,,Nigeria,,lagos,wheat flour,percent of population eating fortified vehicle,vitamin a,5.4,3.8,6.9,total population,2015,"Raw coverage of wheat flour, maize flour, and ...","Grant J Aaron, Valerie M Friesen, Svenja Jungj...",https://doi.org/10.3945/jn.116.245753,,,
122,,Nigeria,,kano,wheat flour,percent of population eating fortified vehicle,vitamin a,22.7,20.0,25.5,total population,2015,"Raw coverage of wheat flour, maize flour, and ...","Grant J Aaron, Valerie M Friesen, Svenja Jungj...",https://doi.org/10.3945/jn.116.245753,,,
154,,Nigeria,,kano,wheat flour,percent of population eating fortified vehicle,zinc,22.7,20.0,25.5,total population,2015,"Raw coverage of wheat flour, maize flour, and ...","Grant J Aaron, Valerie M Friesen, Svenja Jungj...",https://doi.org/10.3945/jn.116.245753,,,


In [52]:
subset_data['Nigeria'] = subset_data['Nigeria'].append(
    nigeria[(nigeria.vehicle=="wheat flour") &
        (nigeria.value_description=="percent of population eating industrially produced vehicle")]
)

nigeria[(nigeria.vehicle=="wheat flour") &
        (nigeria.value_description=="percent of population eating industrially produced vehicle")]

Unnamed: 0,location_id,location_name,urbanicity,subnational_name,vehicle,value_description,nutrient,value_mean,value_025_percentile,value_975_percentile,sub_population,source_year,notes,source_citation,source_link,inclusion_justification,included,data_choice_notes
94,,Nigeria,,lagos,wheat flour,percent of population eating industrially prod...,na,13.8,11.5,16.1,total population,2015,"Raw coverage of wheat flour, maize flour, and ...","Grant J Aaron, Valerie M Friesen, Svenja Jungj...",https://doi.org/10.3945/jn.116.245753,,,
106,,Nigeria,,kano,wheat flour,percent of population eating industrially prod...,na,83.8,81.4,86.2,total population,2015,"Raw coverage of wheat flour, maize flour, and ...","Grant J Aaron, Valerie M Friesen, Svenja Jungj...",https://doi.org/10.3945/jn.116.245755,,,


In [53]:
subset_data['Nigeria'] = subset_data['Nigeria'].append(
    nigeria[(nigeria.vehicle=="wheat flour") &
        (nigeria.value_description=="percent of population eating vehicle")]
)

nigeria[(nigeria.vehicle=="wheat flour") &
        (nigeria.value_description=="percent of population eating vehicle")]

Unnamed: 0,location_id,location_name,urbanicity,subnational_name,vehicle,value_description,nutrient,value_mean,value_025_percentile,value_975_percentile,sub_population,source_year,notes,source_citation,source_link,inclusion_justification,included,data_choice_notes
95,,Nigeria,,lagos,wheat flour,percent of population eating vehicle,na,14.2,11.8,16.5,total population,2015,"Raw coverage of wheat flour, maize flour, and ...","Grant J Aaron, Valerie M Friesen, Svenja Jungj...",https://doi.org/10.3945/jn.116.245753,,,
102,,Nigeria,,kano,wheat flour,percent of population eating vehicle,na,83.9,81.5,86.3,total population,2015,"Raw coverage of wheat flour, maize flour, and ...","Grant J Aaron, Valerie M Friesen, Svenja Jungj...",https://doi.org/10.3945/jn.116.245754,,,


## India

In [54]:
india = check_one_country("India")

india[(india.vehicle.isin(vehicles))].groupby(['vehicle','value_description']).mean()

Unnamed: 0_level_0,Unnamed: 1_level_0,location_id
vehicle,value_description,Unnamed: 2_level_1
oil,percent of population eating fortified vehicle,
oil,percent of population eating industrially produced vehicle,
oil,percent of population eating vehicle,
wheat flour,percent of population eating fortified vehicle,
wheat flour,percent of population eating industrially produced vehicle,
wheat flour,percent of population eating vehicle,


In [55]:
subset_data['India'] = subset_data['India'].append(
    india[(india.vehicle=="oil") &
     (india.value_description=="percent of population eating fortified vehicle") &
     (india.nutrient.isin(nutrients))].iloc[0]
)
india[(india.vehicle=="oil") &
     (india.value_description=="percent of population eating fortified vehicle") &
     (india.nutrient.isin(nutrients))].iloc[0]

location_id                                                              NaN
location_name                                                          India
urbanicity                                                        mixed/both
subnational_name                                                   Rajasthan
vehicle                                                                  oil
value_description             percent of population eating fortified vehicle
nutrient                                                           vitamin a
value_mean                                                              24.3
value_025_percentile                                                    21.1
value_975_percentile                                                    27.9
sub_population                                                   0-24 months
source_year                                                        2013-2014
notes                                                                    NaN

In [56]:
subset_data['India'] = subset_data['India'].append(
    india[(india.vehicle=="oil") &
     (india.value_description=="percent of population eating industrially produced vehicle")].iloc[0]
)

india[(india.vehicle=="oil") &
     (india.value_description=="percent of population eating industrially produced vehicle")].iloc[0]

location_id                                                              NaN
location_name                                                          India
urbanicity                                                        mixed/both
subnational_name                                                   Rajasthan
vehicle                                                                  oil
value_description          percent of population eating industrially prod...
nutrient                                                                  na
value_mean                                                              89.4
value_025_percentile                                                      87
value_975_percentile                                                    91.8
sub_population                                                   0-24 months
source_year                                                             2017
notes                                                                    NaN

In [57]:
india.loc[(india.vehicle=="oil") &
     (india.value_description=="percent of population eating vehicle"),'value_025_percentile'] = 99
india.loc[(india.vehicle=="oil") &
     (india.value_description=="percent of population eating vehicle"),'value_mean'] = 99.999

In [58]:
subset_data['India'] = subset_data['India'].append(
    india[(india.vehicle=="oil") &
     (india.value_description=="percent of population eating vehicle")].iloc[1]
)

india[(india.vehicle=="oil") &
     (india.value_description=="percent of population eating vehicle")].iloc[1]

location_id                                                              NaN
location_name                                                          India
urbanicity                                                        mixed/both
subnational_name                                                   Rajasthan
vehicle                                                                  oil
value_description                       percent of population eating vehicle
nutrient                                                                  na
value_mean                                                            99.999
value_025_percentile                                                      99
value_975_percentile                                                     100
sub_population                                                   0-24 months
source_year                                                        2013-2014
notes                      no confidence interval given - percentage was ...

In [59]:
subset_data['India'] = subset_data['India'].append(
    india[(india.vehicle=="wheat flour") &
     (india.value_description=="percent of population eating fortified vehicle") &
     (india.nutrient.isin(nutrients))].iloc[:2]
)
india[(india.vehicle=="wheat flour") &
     (india.value_description=="percent of population eating fortified vehicle") &
     (india.nutrient.isin(nutrients))].iloc[:2]

of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.


  sort=sort,


Unnamed: 0,location_id,location_name,urbanicity,subnational_name,vehicle,value_description,nutrient,value_mean,value_025_percentile,value_975_percentile,sub_population,source_year,notes,source_citation,source_link,inclusion_justification,included,data_choice_notes
8,,India,mixed/both,Rajasthan,wheat flour,percent of population eating fortified vehicle,iron,6.3,4.8,7.9,0-24 months,2013-2014,atta wheat flour,"Grant J Aaron, Valerie M Friesen, Svenja Jungj...",https://doi.org/10.3945/jn.116.245753,,,
40,,India,mixed/both,Rajasthan,wheat flour,percent of population eating fortified vehicle,folic acid,6.3,4.8,7.9,0-24 months,2013-2014,atta wheat flour,"Grant J Aaron, Valerie M Friesen, Svenja Jungj...",https://doi.org/10.3945/jn.116.245753,,,


In [60]:
subset_data['India'] = subset_data['India'].append(
    india[(india.vehicle=="wheat flour") &
     (india.value_description=="percent of population eating industrially produced vehicle")].iloc[0]
)


india[(india.vehicle=="wheat flour") &
     (india.value_description=="percent of population eating industrially produced vehicle")].iloc[0]

location_id                                                              NaN
location_name                                                          India
urbanicity                                                        mixed/both
subnational_name                                                   Rajasthan
vehicle                                                          wheat flour
value_description          percent of population eating industrially prod...
nutrient                                                                  na
value_mean                                                               7.1
value_025_percentile                                                     5.6
value_975_percentile                                                     9.1
sub_population                                                   0-24 months
source_year                                                        2013-2014
notes                                                       atta wheat flour

In [61]:
subset_data['India'] = subset_data['India'].append(
    india[(india.vehicle=="wheat flour") &
     (india.value_description=="percent of population eating vehicle")].iloc[0]
)


india[(india.vehicle=="wheat flour") &
     (india.value_description=="percent of population eating vehicle")].iloc[0]

location_id                                                              NaN
location_name                                                          India
urbanicity                                                        mixed/both
subnational_name                                                   Rajasthan
vehicle                                                          wheat flour
value_description                       percent of population eating vehicle
nutrient                                                                  na
value_mean                                                              83.2
value_025_percentile                                                    79.5
value_975_percentile                                                    86.5
sub_population                                                   0-24 months
source_year                                                        2013-2014
notes                                                       atta wheat flour

## pop-weight subnationals

In [62]:
#these are the subnats we have to weight

checkout = pd.concat(list(subset_data.values()))
checkout.loc[(checkout.subnational_name.notna()) & (checkout.subnational_name!='na'),
         ['location_name','urbanicity','subnational_name','source_link']].drop_duplicates()

of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.


  This is separate from the ipykernel package so we can avoid doing imports until


Unnamed: 0,location_name,urbanicity,subnational_name,source_link
98,Nigeria,,lagos,https://doi.org/10.3945/jn.116.245753
127,Nigeria,,kano,https://doi.org/10.3945/jn.116.245753
109,Nigeria,,kano,https://doi.org/10.3945/jn.116.245755
103,Nigeria,,kano,https://doi.org/10.3945/jn.116.245754
0,India,mixed/both,Rajasthan,https://doi.org/10.3945/jn.116.245753


In [63]:
loc_metadata = get_locs(location_set_id=35, gbd_round_id=6, decomp_step="step4")
#combine subnational estimates by population-weighting
subnats = loc_metadata[['location_id','location_name','parent_id']]
subnats = subnats.rename(columns = {
    'location_id':'subnational_id',
    'location_name':'subnational_name'
})

In [64]:
subnat_pop = get_population(age_group_id=22, 
                     location_id=list(subnats.subnational_id),
                     year_id=2017,
                     sex_id=3,
                     gbd_round_id=6, 
                     decomp_step='step5')

In [65]:
subnats = subnats.merge(subnat_pop, left_on = 'subnational_id', right_on = 'location_id')[['subnational_id','subnational_name','population','parent_id']]

In [66]:
subnats = subnats[(subnats.subnational_name.isin(['Kano','Lagos']))]

In [67]:
subnats['pop_denom'] = subnats.groupby('parent_id').transform('sum').population
subnats['subnat_pop_weight'] = subnats.population / subnats.pop_denom

In [68]:
subnats

Unnamed: 0,subnational_id,subnational_name,population,parent_id,pop_denom,subnat_pop_weight
1060,25337,Kano,15111400.0,214,21690100.0,0.696694
1065,25342,Lagos,6578742.0,214,21690100.0,0.303306


In [69]:
tmp = subset_data['Nigeria']

In [70]:
tmp.subnational_name.unique()

array(['lagos', 'kano'], dtype=object)

In [71]:
tmp['subnational_name'] = tmp.subnational_name.map({i:i.capitalize() for i in tmp.subnational_name})

In [72]:
tmp.groupby(['location_name','vehicle','value_description','nutrient']).count()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,location_id,urbanicity,subnational_name,value_mean,value_025_percentile,value_975_percentile,sub_population,source_year,notes,source_citation,source_link,inclusion_justification,included,data_choice_notes
location_name,vehicle,value_description,nutrient,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1
Nigeria,maize flour,percent of population eating fortified vehicle,folic acid,0,0,2,2,2,2,2,2,2,2,2,0,0,2
Nigeria,maize flour,percent of population eating fortified vehicle,vitamin a,0,0,2,2,2,2,2,2,2,2,2,0,0,2
Nigeria,maize flour,percent of population eating fortified vehicle,zinc,0,0,2,2,2,2,2,2,2,2,2,0,0,2
Nigeria,maize flour,percent of population eating industrially produced vehicle,na,0,0,2,2,2,2,2,2,2,2,2,0,0,2
Nigeria,maize flour,percent of population eating vehicle,na,0,0,2,2,2,2,2,2,2,2,2,0,0,2
Nigeria,oil,percent of population eating fortified vehicle,vitamin a,0,0,2,2,2,2,2,2,2,2,2,0,0,2
Nigeria,oil,percent of population eating industrially produced vehicle,na,0,0,2,2,2,2,2,2,2,2,2,0,0,2
Nigeria,oil,percent of population eating vehicle,na,0,0,2,2,2,2,2,2,2,2,2,0,0,2
Nigeria,wheat flour,percent of population eating fortified vehicle,folic acid,0,0,2,2,2,2,2,2,2,2,2,0,0,2
Nigeria,wheat flour,percent of population eating fortified vehicle,iron,0,0,2,2,2,2,2,2,2,2,2,0,0,2


In [73]:
tmp.loc[tmp.source_citation.isna(),'source_citation'] = 'na'
tmp.loc[tmp.source_link.isna(),'source_link'] = 'na'
# tmp.loc[tmp.estimation_status.isna(),'estimation_status'] = 'na'

tmp.loc[tmp.value_025_percentile=='na','value_025_percentile'] = np.nan
tmp.loc[tmp.value_975_percentile=='na','value_975_percentile'] = np.nan

tmp['is_dupl'] = tmp.duplicated(subset = rcols, keep = False)

In [74]:
tmp = tmp.merge(subnats[['subnational_name','subnat_pop_weight']], on = 'subnational_name', how = 'left')

In [75]:
tmp.subnat_pop_weight = tmp.subnat_pop_weight.astype(float)
tmp.value_mean = tmp.value_mean.astype(float)
tmp.value_025_percentile = tmp.value_025_percentile.astype(float)
tmp.value_975_percentile = tmp.value_975_percentile.astype(float)

tmp.loc[tmp.is_dupl,'value_mean'] = tmp.loc[tmp.is_dupl].value_mean * tmp.loc[tmp.is_dupl].subnat_pop_weight
tmp.loc[tmp.is_dupl,'value_025_percentile'] = tmp.value_025_percentile * tmp.subnat_pop_weight
tmp.loc[tmp.is_dupl,'value_975_percentile'] = tmp.value_975_percentile * tmp.subnat_pop_weight

tmp.loc[~tmp.is_dupl,'subnational_name'] = 'na'

In [76]:
tmp.value_mean = tmp.groupby(rcols).transform('sum').value_mean
tmp.value_025_percentile = tmp.groupby(rcols).transform('sum').value_025_percentile
tmp.value_975_percentile = tmp.groupby(rcols).transform('sum').value_975_percentile

In [77]:
tmp = tmp[['location_name','vehicle','value_description','nutrient','source_year','sub_population','value_mean','value_025_percentile','value_975_percentile']].drop_duplicates()

In [78]:
tmp

Unnamed: 0,location_name,vehicle,value_description,nutrient,source_year,sub_population,value_mean,value_025_percentile,value_975_percentile
0,Nigeria,maize flour,percent of population eating fortified vehicle,folic acid,2015,total population,1.24504,0.627025,1.96306
1,Nigeria,maize flour,percent of population eating fortified vehicle,vitamin a,2015,total population,1.24504,0.627025,1.96306
2,Nigeria,maize flour,percent of population eating fortified vehicle,zinc,2015,total population,1.24504,0.627025,1.96306
6,Nigeria,maize flour,percent of population eating industrially prod...,na,2015,total population,8.54322,6.8162,10.3399
8,Nigeria,maize flour,percent of population eating vehicle,na,2015,total population,57.4155,54.8671,60.0335
10,Nigeria,oil,percent of population eating fortified vehicle,vitamin a,2015,total population,7.47868,5.77868,9.24835
12,Nigeria,oil,percent of population eating industrially prod...,na,2015,total population,31.8964,28.8177,34.975
14,Nigeria,oil,percent of population eating vehicle,na,2015,total population,98.4607,97.6607,99.2303
16,Nigeria,wheat flour,percent of population eating fortified vehicle,iron,2015,total population,17.4528,15.0864,19.8585
17,Nigeria,wheat flour,percent of population eating fortified vehicle,folic acid,2015,total population,17.4528,15.0864,19.8585


In [79]:
subset_data['Nigeria'] = tmp

## Check for missingness

In [80]:
all_data = pd.concat(list(subset_data.values()))

of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.


  """Entry point for launching an IPython kernel.


In [81]:
all_data.data_choice_notes.unique()

array(['kept most recent year / representative populations', '', nan],
      dtype=object)

In [82]:
all_data.loc[~(all_data.nutrient.isin(['vitamin a','iron','zinc','folic acid'])),'nutrient'] = 'na'

In [83]:
all_data[rcols + ['value_mean','value_025_percentile','value_975_percentile','sub_population']]

Unnamed: 0,location_name,vehicle,value_description,nutrient,value_mean,value_025_percentile,value_975_percentile,sub_population
188,Ethiopia,oil,percent of population eating industrially prod...,na,70.0,,,under-5
189,Ethiopia,oil,percent of population eating industrially prod...,na,50.0,,,women of reproductive age
178,Ethiopia,wheat flour,percent of population eating industrially prod...,na,27.0,na,na,under-5
179,Ethiopia,wheat flour,percent of population eating industrially prod...,na,20.0,na,na,women of reproductive age
0,Nigeria,maize flour,percent of population eating fortified vehicle,folic acid,1.24504,0.627025,1.96306,total population
1,Nigeria,maize flour,percent of population eating fortified vehicle,vitamin a,1.24504,0.627025,1.96306,total population
2,Nigeria,maize flour,percent of population eating fortified vehicle,zinc,1.24504,0.627025,1.96306,total population
6,Nigeria,maize flour,percent of population eating industrially prod...,na,8.54322,6.8162,10.3399,total population
8,Nigeria,maize flour,percent of population eating vehicle,na,57.4155,54.8671,60.0335,total population
10,Nigeria,oil,percent of population eating fortified vehicle,vitamin a,7.47868,5.77868,9.24835,total population


In [84]:
check = target.merge(all_data[rcols + ['value_mean']], on = rcols, how = 'left')

In [85]:
check

Unnamed: 0,location_name,vehicle,value_description,nutrient,value_mean
0,Ethiopia,maize flour,percent of population eating fortified vehicle,folic acid,
1,Ethiopia,maize flour,percent of population eating fortified vehicle,iron,
2,Ethiopia,maize flour,percent of population eating fortified vehicle,vitamin a,
3,Ethiopia,maize flour,percent of population eating fortified vehicle,zinc,
4,Ethiopia,maize flour,percent of population eating industrially prod...,na,
5,Ethiopia,maize flour,percent of population eating vehicle,na,
6,Ethiopia,oil,percent of population eating fortified vehicle,vitamin a,
7,Ethiopia,oil,percent of population eating industrially prod...,na,70.0
8,Ethiopia,oil,percent of population eating industrially prod...,na,50.0
9,Ethiopia,oil,percent of population eating vehicle,na,


In [86]:
need_reg = check.loc[check.value_mean.isna(),['value_description','vehicle','location_name','nutrient']]

In [87]:
need_reg

Unnamed: 0,value_description,vehicle,location_name,nutrient
0,percent of population eating fortified vehicle,maize flour,Ethiopia,folic acid
1,percent of population eating fortified vehicle,maize flour,Ethiopia,iron
2,percent of population eating fortified vehicle,maize flour,Ethiopia,vitamin a
3,percent of population eating fortified vehicle,maize flour,Ethiopia,zinc
4,percent of population eating industrially prod...,maize flour,Ethiopia,na
5,percent of population eating vehicle,maize flour,Ethiopia,na
6,percent of population eating fortified vehicle,oil,Ethiopia,vitamin a
9,percent of population eating vehicle,oil,Ethiopia,na
10,percent of population eating fortified vehicle,wheat flour,Ethiopia,folic acid
11,percent of population eating fortified vehicle,wheat flour,Ethiopia,iron


In [88]:
need_reg = need_reg.merge(reg_estimates, on = ['value_description','vehicle','location_name','nutrient'], how = 'left')

In [89]:
need_reg

Unnamed: 0,value_description,vehicle,location_name,nutrient,estimation_status,value_mean
0,percent of population eating fortified vehicle,maize flour,Ethiopia,folic acid,regression,0.0
1,percent of population eating fortified vehicle,maize flour,Ethiopia,iron,regression,0.0
2,percent of population eating fortified vehicle,maize flour,Ethiopia,vitamin a,regression,0.0
3,percent of population eating fortified vehicle,maize flour,Ethiopia,zinc,regression,0.0
4,percent of population eating industrially prod...,maize flour,Ethiopia,na,regression,27.169813
5,percent of population eating vehicle,maize flour,Ethiopia,na,regression,59.259825
6,percent of population eating fortified vehicle,oil,Ethiopia,vitamin a,regression,0.0
7,percent of population eating vehicle,oil,Ethiopia,na,regression,29.35
8,percent of population eating fortified vehicle,wheat flour,Ethiopia,folic acid,regression,0.0
9,percent of population eating fortified vehicle,wheat flour,Ethiopia,iron,regression,0.0


In [90]:
all_data = all_data.append(need_reg)

In [91]:
all_data.value_mean = all_data.value_mean.astype(float)

In [92]:
fort = all_data[(all_data.value_description=="percent of population eating fortified vehicle")]
other = all_data[(all_data.value_description!="percent of population eating fortified vehicle")]

In [93]:
fort = pd.pivot_table(fort, index=['location_name','vehicle'],values = 'value_mean', columns = 'value_description')

In [94]:
other = pd.pivot_table(other, index=['location_name','vehicle'],values = 'value_mean', columns = 'value_description')

In [95]:
validate = fort.reset_index().merge(other.reset_index(), on = ['location_name','vehicle'], how = 'outer').set_index(['location_name','vehicle'])

In [96]:
validate.columns = [i.replace(" ","_") for i in validate.columns]

In [97]:
validate

Unnamed: 0_level_0,Unnamed: 1_level_0,percent_of_population_eating_fortified_vehicle,percent_of_population_eating_industrially_produced_vehicle,percent_of_population_eating_vehicle
location_name,vehicle,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Ethiopia,maize flour,0.0,27.169813,59.259825
Ethiopia,oil,0.0,60.0,29.35
Ethiopia,wheat flour,0.0,23.5,28.0
India,oil,24.3,89.4,99.999
India,wheat flour,4.250111,7.1,83.2
Nigeria,maize flour,2.29711,8.543224,57.415462
Nigeria,oil,7.478678,31.896365,98.460661
Nigeria,wheat flour,17.452812,62.568603,62.759594


In [98]:
validate.loc[(validate.percent_of_population_eating_fortified_vehicle > validate.percent_of_population_eating_industrially_produced_vehicle) | (validate.percent_of_population_eating_industrially_produced_vehicle > validate.percent_of_population_eating_vehicle)]

Unnamed: 0_level_0,Unnamed: 1_level_0,percent_of_population_eating_fortified_vehicle,percent_of_population_eating_industrially_produced_vehicle,percent_of_population_eating_vehicle
location_name,vehicle,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Ethiopia,oil,0.0,60.0,29.35


In [99]:
check = target.merge(all_data[rcols + ['value_mean']], on = rcols, how = 'left')

In [100]:
assert(len(check[check.value_mean.isna()])==0), "there are target loc/vehicle/val/nutrient combos youre missing"

In [101]:
rcols

['location_name', 'vehicle', 'value_description', 'nutrient']

In [102]:
output = all_data[rcols + ['value_mean','value_025_percentile', 'value_975_percentile','sub_population','estimation_status','source_citation','source_link','data_choice_notes']].sort_values(rcols).set_index(rcols)

In [103]:
##impute all missing CIs

# clean value_mean
output.loc[output.value_mean=='na','value_mean'] = np.nan
output.value_mean = output.value_mean.astype(float)

# clean 2.5th %ile
output.loc[output.value_025_percentile=='na','value_025_percentile'] = np.nan
output.value_025_percentile = output.value_025_percentile.astype(float)

# clean 97.5th %ile
output.loc[output.value_975_percentile=='na','value_975_percentile'] = np.nan
output.value_975_percentile = output.value_975_percentile.astype(float)

  result = method(y)


In [104]:
output.loc[(output.value_mean > output.value_975_percentile),'value_975_percentile'] = np.nan

In [105]:
output.loc[(output.value_mean < output.value_025_percentile)]

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,value_mean,value_025_percentile,value_975_percentile,sub_population,estimation_status,source_citation,source_link,data_choice_notes
location_name,vehicle,value_description,nutrient,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1


In [106]:
output.loc[(output.value_mean == output.value_025_percentile)]

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,value_mean,value_025_percentile,value_975_percentile,sub_population,estimation_status,source_citation,source_link,data_choice_notes
location_name,vehicle,value_description,nutrient,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1


In [107]:
output['scale_over_mean'] = (output.value_975_percentile - output.value_025_percentile) / output.value_mean

In [108]:
output = output.reset_index()

In [109]:
r = output.loc[(output.scale_over_mean!=np.inf),['vehicle','scale_over_mean']]
# .groupby('vehicle').mean().dropna().rename(columns={'scale_over_mean':'r'}).reset_index()

In [110]:
r_mean = r.scale_over_mean.mean()

In [111]:
r.loc[r.vehicle=="maize flour",'scale_over_mean'] = r_mean

In [112]:
r = r.groupby('vehicle').mean().dropna().rename(columns={'scale_over_mean':'r'}).reset_index()

In [113]:
# add uncertainty
output = output.merge(r, on = 'vehicle', how = 'outer')

In [114]:
output.loc[(output.estimation_status=="regression"),'r'] = output.r * 2

In [115]:
output['lower'] = np.clip(output.value_mean - (output.r * output.value_mean)/2, 0, 100)
output['upper'] = np.clip(output.value_mean + (output.r * output.value_mean)/2, 0, 100)

In [116]:
output

Unnamed: 0,location_name,vehicle,value_description,nutrient,value_mean,value_025_percentile,value_975_percentile,sub_population,estimation_status,source_citation,source_link,data_choice_notes,scale_over_mean,r,lower,upper
0,Ethiopia,maize flour,percent of population eating fortified vehicle,folic acid,0.0,,,,regression,,,,,0.718554,0.0,0.0
1,Ethiopia,maize flour,percent of population eating fortified vehicle,iron,0.0,,,,regression,,,,,0.718554,0.0,0.0
2,Ethiopia,maize flour,percent of population eating fortified vehicle,vitamin a,0.0,,,,regression,,,,,0.718554,0.0,0.0
3,Ethiopia,maize flour,percent of population eating fortified vehicle,zinc,0.0,,,,regression,,,,,0.718554,0.0,0.0
4,Ethiopia,maize flour,percent of population eating industrially prod...,na,27.169813,,,,regression,,,,,0.718554,17.408327,36.931299
5,Ethiopia,maize flour,percent of population eating vehicle,na,59.259825,,,,regression,,,,,0.718554,37.96914,80.55051
6,Nigeria,maize flour,percent of population eating fortified vehicle,folic acid,1.245041,0.627025,1.963058,total population,,,,,1.073083,0.359277,1.021384,1.468699
7,Nigeria,maize flour,percent of population eating fortified vehicle,iron,5.453315,,,,regression,,,,,0.718554,3.494065,7.412565
8,Nigeria,maize flour,percent of population eating fortified vehicle,vitamin a,1.245041,0.627025,1.963058,total population,,,,,1.073083,0.359277,1.021384,1.468699
9,Nigeria,maize flour,percent of population eating fortified vehicle,zinc,1.245041,0.627025,1.963058,total population,,,,,1.073083,0.359277,1.021384,1.468699


In [117]:
output.loc[(output.value_mean < output.value_025_percentile) | (output.value_025_percentile.isna()),'CI_source'] = "modeling"
output.loc[(output.value_mean > output.value_975_percentile) | (output.value_975_percentile.isna()),'CI_source'] = "modeling"

output.loc[output.CI_source.isna(),'CI_source'] = 'extraction'

output.loc[(output.value_mean < output.value_025_percentile) | (output.value_025_percentile.isna()),'value_025_percentile'] = output.loc[(output.value_mean < output.value_025_percentile) | (output.value_025_percentile.isna())].lower
output.loc[(output.value_mean > output.value_975_percentile) | (output.value_975_percentile.isna()),'value_975_percentile'] = output.loc[(output.value_mean > output.value_975_percentile) | (output.value_975_percentile.isna())].upper

output = output.drop(columns=['r','lower','upper','scale_over_mean']).set_index(rcols)

output.loc[output.estimation_status.isna(),'estimation_status'] = 'na'

  raw_cell, store_history, silent, shell_futures)


In [118]:
output

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,value_mean,value_025_percentile,value_975_percentile,sub_population,estimation_status,source_citation,source_link,data_choice_notes,CI_source
location_name,vehicle,value_description,nutrient,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
Ethiopia,maize flour,percent of population eating fortified vehicle,folic acid,0.0,0.0,0.0,,regression,,,,modeling
Ethiopia,maize flour,percent of population eating fortified vehicle,iron,0.0,0.0,0.0,,regression,,,,modeling
Ethiopia,maize flour,percent of population eating fortified vehicle,vitamin a,0.0,0.0,0.0,,regression,,,,modeling
Ethiopia,maize flour,percent of population eating fortified vehicle,zinc,0.0,0.0,0.0,,regression,,,,modeling
Ethiopia,maize flour,percent of population eating industrially produced vehicle,na,27.169813,17.408327,36.931299,,regression,,,,modeling
Ethiopia,maize flour,percent of population eating vehicle,na,59.259825,37.96914,80.55051,,regression,,,,modeling
Nigeria,maize flour,percent of population eating fortified vehicle,folic acid,1.245041,0.627025,1.963058,total population,na,,,,extraction
Nigeria,maize flour,percent of population eating fortified vehicle,iron,5.453315,3.494065,7.412565,,regression,,,,modeling
Nigeria,maize flour,percent of population eating fortified vehicle,vitamin a,1.245041,0.627025,1.963058,total population,na,,,,extraction
Nigeria,maize flour,percent of population eating fortified vehicle,zinc,1.245041,0.627025,1.963058,total population,na,,,,extraction


In [119]:
output = output.reset_index()

In [120]:
output.loc[(output.location_name=="Vietnam"),'location_name'] = "Viet Nam"

In [121]:
sort_helper = {
    'percent of population eating fortified vehicle': 'C',
 'percent of population eating industrially produced vehicle': 'B',
 'percent of population eating vehicle': 'A'}

In [122]:
output['sort_helper'] = output.value_description.map(sort_helper)

In [123]:
output = output.sort_values(['location_name','vehicle','sort_helper','value_description','nutrient']).drop(columns='sort_helper')

In [124]:
save_path_tmp = '/ihme/homes/beatrixh/repos/scratch/tier1_coverage_data_03_31_2021.csv'
output.to_csv(save_path_tmp, index = False)

In [126]:
output

Unnamed: 0,location_name,vehicle,value_description,nutrient,value_mean,value_025_percentile,value_975_percentile,sub_population,estimation_status,source_citation,source_link,data_choice_notes,CI_source
5,Ethiopia,maize flour,percent of population eating vehicle,na,59.259825,37.96914,80.55051,,regression,,,,modeling
4,Ethiopia,maize flour,percent of population eating industrially prod...,na,27.169813,17.408327,36.931299,,regression,,,,modeling
0,Ethiopia,maize flour,percent of population eating fortified vehicle,folic acid,0.0,0.0,0.0,,regression,,,,modeling
1,Ethiopia,maize flour,percent of population eating fortified vehicle,iron,0.0,0.0,0.0,,regression,,,,modeling
2,Ethiopia,maize flour,percent of population eating fortified vehicle,vitamin a,0.0,0.0,0.0,,regression,,,,modeling
3,Ethiopia,maize flour,percent of population eating fortified vehicle,zinc,0.0,0.0,0.0,,regression,,,,modeling
15,Ethiopia,oil,percent of population eating vehicle,na,29.35,24.377851,34.322149,,regression,,,,modeling
13,Ethiopia,oil,percent of population eating industrially prod...,na,70.0,64.070691,75.929309,under-5,na,"Hafebo AS, Ndao PBL, Wuehler S, et al. Overvie...",http://www.journalejnfs.com/index.php/EJNFS/ar...,kept most recent year / representative populat...,modeling
14,Ethiopia,oil,percent of population eating industrially prod...,na,50.0,45.764779,54.235221,women of reproductive age,na,"Hafebo AS, Ndao PBL, Wuehler S, et al. Overvie...",http://www.journalejnfs.com/index.php/EJNFS/ar...,kept most recent year / representative populat...,modeling
12,Ethiopia,oil,percent of population eating fortified vehicle,vitamin a,0.0,0.0,0.0,,regression,,,,modeling


In [127]:
save_path = '/ihme/homes/beatrixh/vivarium_research_lsff/data_prep/outputs/tier1_coverage_data_03_31_2021.csv'
output.to_csv(save_path, index = False)