# Team Ernst Engel Nutritional Content Notebook
In this notebook, we obtain the nutritional contents for the foods of interest for our project. Moreover, we examine the nutritional adequacy of the diets of the housholds in our analysis.

In [275]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from ipywidgets import interact, interactive, widgets
import fooddatacentral as fdc

# This is a local file unique to Jordan's computer
# It contains his USDA FoodData Central API Key
# Keep Commented or it will cause an error
import config

## Getting Nutritional Contents

In [140]:
foods_oi = pd.read_csv('./data/foods_oi.csv').set_index('Food')
apikey = config.API_KEY
dr_categories = pd.read_csv('./data/us_diet_minimums.csv')['Nutrition'].values

In [189]:
def get_nutritional_content(foods_oi, apikey, dr_categories):
    '''
    Description
    --------------------------------------------------
    Gets the nutritional content for the food of 
    interest.
    
    Inputs
    --------------------------------------------------
    + foods_oi : pandas dataframe; foods of interest
        with their FDC id
    + apikey : string; FoodData Central API key
    + dr_categories : array; USDA dietary requirment
        categories
    
    Outputs
    --------------------------------------------------
    + food_nutrients : pandas dataframe; contains the
        nutritional contents of all foods of interest
        for the dr_categories
    '''
    food_nutrients = {}
    for food in foods_oi.index:
        try:
            FDC = foods_oi.loc[food,'FDC']
            food_nutrients[food] = fdc.nutrients(apikey,FDC).Quantity
        except:
            pass

    food_nutrients = pd.DataFrame(food_nutrients,dtype=float)

    food_nutrients = food_nutrients[food_nutrients.index.isin(dr_categories)].fillna(0)
    return food_nutrients

In [191]:
food_nutrients = get_nutritional_content(foods_oi, apikey, dr_categories)
food_nutrients

Unnamed: 0,Wheat Flour,Wheat Cereal,Corn Meal,Rolled Oats,Evaporated Milk,Cabbage,Potatoes,Spinach,Sweet Potatoes,Navy Beans,Sugar,Beets,Milk (Whole),Liver (Beef)
"Calcium, Ca",0.0,19.0,0.0,52.0,267.0,40.0,0.0,99.0,35.0,49.0,1.0,16.0,123.0,5.0
"Carbohydrate, by difference",70.7,76.3,78.79,67.5,10.0,5.8,16.89,3.63,20.0,16.13,99.77,9.56,4.63,3.89
Energy,345.0,359.0,345.0,375.0,133.0,103.0,74.0,97.0,82.0,97.0,1627.0,180.0,61.0,564.0
"Fiber, total dietary",2.6,10.4,0.0,10.0,0.0,2.5,2.0,2.2,3.5,4.0,0.0,2.8,0.0,0.0
"Folate, DFE",0.0,220.0,0.0,0.0,0.0,43.0,0.0,194.0,0.0,0.0,0.0,109.0,0.0,290.0
"Iron, Fe",0.0,27.6,0.0,5.0,0.0,0.47,0.73,2.71,0.59,1.61,0.06,0.8,0.0,4.9
"Magnesium, Mg",0.0,30.0,0.0,0.0,0.0,12.0,0.0,79.0,0.0,0.0,0.0,23.0,12.0,18.0
Niacin,0.0,7.25,5.455,0.0,0.0,0.234,0.0,0.724,0.0,0.0,0.0,0.334,0.105,13.175
"Phosphorus, P",0.0,112.0,0.0,0.0,0.0,26.0,0.0,49.0,0.0,0.0,0.0,40.0,101.0,387.0
"Potassium, K",0.0,126.0,167.0,352.0,300.0,170.0,439.0,558.0,341.0,323.0,2.0,325.0,150.0,313.0


## Examing Nutritional Adequacy

In [268]:
food_nutrients = pd.read_csv('./data/uganda_nutritional_contents.csv').set_index('n')
food_prices = pd.read_csv('./data/uganda_food_prices.csv').fillna(0)
hh_chars = pd.read_csv('./data/uganda_hh_chars.csv')
### Deals With Housholds from an  Unknown Region
hh_chars['m'] = hh_chars['m'].fillna('Unknown')
expenditures = pd.read_csv('./data/uganda_expenditures_19-20.csv').fillna(0)

In [452]:
year_range = '2019-20'
hhc_sub = hh_chars[hh_chars['t'] == year_range].reset_index(drop = True)
fp_sub = food_prices[food_prices['t'] == year_range].set_index('m').drop(columns = ['t'])
food_cols = expenditures.iloc[0:2, 3:].columns
fp_sub_avgs = fp_sub.reset_index(drop = True)
fp_sub_avgs = pd.DataFrame(fp_sub_avgs.mean()).rename(columns = {0 : 'Mean_Price'})

In [438]:
def get_col_counts(expenditures_df, food_col, prices, price_avs):
    counts = []
    for idx in expenditures_df.index:
        region = expenditures_df.loc[idx, 'm']
        expenditure = expenditures_df.loc[idx, food_col]
        if region == 'Unknown':
            # Imputes price from unknown region with the mean accross all regions
            price = fp_sub_avgs.loc[food_col][0]
            if price == 0: # Don't want to divide by zero
                count = 0
                counts.append(count)
            else:
                count = expenditure / price
                counts.append(count)   
        else:
            price = prices.loc[region, food_col]
            if price == 0: # Don't want to divide by zero
                count = 0
                counts.append(count)
            else:
                count = expenditure / price
                counts.append(count)
    return counts

In [440]:
def get_counts(expenditures_df, food_cols, prices, price_avs):
    count_df = expenditures_df.drop(columns = food_cols)
    for food_col in food_cols:
        counts = get_col_counts(expenditures_df, food_col, prices, price_avs)
        count_df[food_col] = counts
    return count_df

In [448]:
# exp_counts = get_counts(expenditures, food_cols, fp_sub, fp_sub_avgs)

In [450]:
household_master = hhc_sub.merge(exp_counts, left_on = ['i', 't', 'm'], right_on = ['i', 't', 'm'])
household_master

Unnamed: 0,i,t,m,F 00-03,F 04-08,F 09-13,F 14-18,F 19-30,F 31-50,F 51+,...,Sugarcane,Sweet Bananas,Sweet Potatoes,Tea,Tomatoes,Waragi,Water,Wheat (flour),Yam,Yogurt
0,00c9353d8ebe42faabf5919b81d7fae7,2019-20,Eastern,1.0,0.0,0.0,3.0,1.0,0.0,1.0,...,0.0,0.000000,5.034965,0.020000,1.600000,0.0,0.0,0.0,0.865775,0.0
1,062da72d5d3a457e9336b62c8bb9096d,2019-20,Eastern,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.000000,0.000000,0.000000,0.000000,0.0,0.0,0.0,0.000000,0.0
2,0d0e29faff394154a69562b4527b48b8,2019-20,Eastern,1.0,0.0,0.0,0.0,1.0,0.0,0.0,...,0.0,0.000000,0.000000,0.000000,1.280000,0.0,0.0,0.0,0.000000,0.0
3,0e03e253c35d4333a1ffad2df9d38850,2019-20,Eastern,1.0,1.0,1.0,0.0,1.0,0.0,0.0,...,0.0,0.000000,6.293706,0.020000,0.800000,0.0,0.0,0.0,0.000000,0.0
4,1013000201,2019-20,Central,0.0,0.0,0.0,0.0,0.0,0.0,1.0,...,0.0,1.709975,0.000000,0.030769,3.126136,0.0,0.0,0.0,0.000000,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3002,bfdf0d66403440ceab439b1e1c47cdea,2019-20,Eastern,0.0,1.0,1.0,0.0,0.0,1.0,0.0,...,0.0,0.000000,7.552448,0.010000,1.600000,0.0,0.0,0.0,0.288592,0.0
3003,c33f6cb57d9849949e08a7350dabb829,2019-20,Central,0.0,0.0,0.0,0.0,0.0,0.0,1.0,...,0.0,0.000000,1.290323,0.015385,1.488636,0.0,0.0,0.0,0.000000,0.0
3004,d10a687889de469687377204195f3db0,2019-20,Western,0.0,0.0,0.0,0.0,0.0,0.0,1.0,...,0.0,0.000000,2.720000,0.000000,1.333333,0.0,0.0,0.0,0.000000,0.0
3005,d24fa50d02c041969a42102d8ebdadc9,2019-20,Eastern,0.0,1.0,1.0,1.0,0.0,1.0,0.0,...,0.0,0.000000,37.762238,0.000000,1.920000,0.0,0.0,0.0,0.000000,0.0
