# Food Dude
### Jaiden Gerig, Oron Hazi, Justin Katz, Kyle Wilson

## Importing Our Data
Luckily this is all in a CSV file so we can grab it easilty with Pandas

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline

foods = pd.read_csv('ABBREV.csv', index_col=0)
foods.head()

## Washing Our Food
to start off we only wanted to look at the main nutritional values (Protein,Fat,Sodium,Etc.) of each food

In [None]:
#Rename some of our columns to something a bit easier on the eyes
foods = foods.rename(index=str,columns={'Shrt_Desc':'Name','Protein_(g)':'Protein (g)','Lipid_Tot_(g)':'Total Fat(g)','Cholestrl_(mg)':'Cholesterol (mg)',
               'FA_Sat_(g)':'Saturated Fat (g)','Sodium_(mg)':'Sodium (mg)','Potassium_(mg)':'Potassium (mg)',
               'Carbohydrt_(g)':'Carbohydrates (g)','Fiber_TD_(g)':'Fiber (g)'})
# Look at a specific subset of nutrients
foods['Weight (g)'] = 100
nutrients = ['Name','Protein (g)','Total Fat(g)','Cholesterol (mg)',
               'Saturated Fat (g)','Sodium (mg)','Potassium (mg)',
               'Carbohydrates (g)','Fiber (g)','Weight (g)']
foods = foods[nutrients]
foods = foods.fillna(0)
# Get rid of foods we dont have serving sizes for
foods = foods[foods.apply(lambda x:x['Weight (g)'] > 0, axis=1)]
foods.head()

We wanted to look the nutritional value of each food regardless of serving size, so we normalized them to 1 gram

In [None]:
# Normalizing all our foods to 1 g
def normalizeNutrients(x):
    ratio = x['Weight (g)']
    for nutrient in nutrients:
        if(type(x[nutrient]) is str):
            continue
        x[nutrient] = x[nutrient]/ratio
    return x
foods = foods.apply(normalizeNutrients,axis=1)
avg_nutrients = foods.mean()
avg_nutrients = avg_nutrients.drop("Weight (g)")
foods.head()

## Making a Meal
We started our food analysis by taking all the foods and Scaling them up to meet the demands of an average a 2000 calorie diet according to [Netrition](http://www.netrition.com/rdi_page.html).

The first step we took was to exclude foods that didnt contain all the nutrients we were looking at

In [None]:
# We only want foods that have a chance to sustain our needs
def filterNutrients(x):
    for nutrient in nutrients:
        if(x[nutrient] <= 0):
            return False
    return True
foods = foods[foods.apply(filterNutrients, axis=1)]
print 'Matching Foods:',len(foods)
foods.head()

Nice, That leaves us with almost 1400 foods to look at, not too shabby
So let's take these foods and scale them up to see how many grams of each we would have to consume to fulfill our daily nutrition requirements

In [None]:
# http://www.netrition.com/rdi_page.html
recommended = [-1,50,65,300,20,2400,3500,300,25,-1]
def findSatisfyingWeight(food):
    for x in range(0,len(nutrients)):
        nutrient = nutrients[x]
        rec = recommended[x]
        if(rec == -1 or food[nutrient] >= rec):
            continue
        ratio = rec/food[nutrient]
        for y in nutrients:
            if(type(food[y]) is str):
                continue
            food[y] = food[y]*ratio
    return food   
weighted_foods = foods.apply(findSatisfyingWeight,axis=1)
display = ['Name','Weight (g)','Protein (g)','Total Fat(g)','Cholesterol (mg)',
               'Saturated Fat (g)','Sodium (mg)','Potassium (mg)',
               'Carbohydrates (g)','Fiber (g)']
weighted_foods[display].sort_values(by='Weight (g)').head(10)

Yikes, none of that seems very healthy at all

To some of these foods into perspective:

* Potato Pancakes: 1078 grams = 49 pancakes
* Chicken Noodle Soup: 1143 grams = 15 packets
* Digiorno Thin Crust Pizza: 1305 grams = 2.4 Pizza

Let's see what happens when we look at how far over the reccomended nutritional values these foods go

In [None]:
from bokeh.io import push_notebook,show,output_notebook
from bokeh.layouts import row
from bokeh.plotting import figure
from bokeh.charts import Bar, output_file, show
from bokeh.models import Range1d
from bokeh.charts.operations import blend
from bokeh import palettes
output_notebook()
def findOverages(food):
    for x in range(0,len(nutrients)):
        nutrient = nutrients[x]
        rec = recommended[x]
        if(rec == -1):
            continue
        food[nutrient] -= rec
    return food   
overage_foods = weighted_foods.apply(findOverages,axis=1)
df = overage_foods.sort_values(by='Weight (g)').head(10)
a = Bar(df, 'Name', values='Protein (g)', title="Excess Protein",legend=False,width=450,continuous_range=Range1d(0,100))
b = Bar(df, 'Name', values='Total Fat(g)', title="Excess Total Fat",legend=False,width=450)
a.xaxis.axis_label = ""
b.xaxis.axis_label = ""
show(row(a,b))
a = Bar(df, 'Name', values='Cholesterol (mg)', title="Excess Cholesterol",legend=False,width=450)
b = Bar(df, 'Name', values='Saturated Fat (g)', title="Excess Saturated Fats",legend=False,width=450)
a.xaxis.axis_label = ""
b.xaxis.axis_label = ""
show(row(a,b))
a = Bar(df, 'Name', values='Sodium (mg)', title="Excess Sodium",legend=False,width=450)
b = Bar(df, 'Name', values='Potassium (mg)', title="Excess Potassium",legend=False,width=450)
a.xaxis.axis_label = ""
b.xaxis.axis_label = ""
show(row(a,b))
a = Bar(df, 'Name', values='Carbohydrates (g)', title="Excess Carbohydrates",legend=False,width=450)
b = Bar(df, 'Name', values='Fiber (g)', title="Excess Fiber",legend=False,width=450)
a.xaxis.axis_label = ""
b.xaxis.axis_label = ""
show(row(a,b))

So that gives us a general idea of what kind of foods have common excesses but it's hard to compare them to each-other so let's convert them to percentages of the 2000 calorie diet over 100%

In [None]:
def findPercentOverages(food):
    for x in range(0,len(nutrients)):
        nutrient = nutrients[x]
        rec = recommended[x]
        if(rec == -1):
            continue
        food[nutrient] = ((food[nutrient]/rec)*100)-100
    return food   
overage_foods = weighted_foods.apply(findPercentOverages,axis=1)
overage_foods[display].sort_values(by='Weight (g)').head(10).rename(index=str,columns={'Protein (g)':'Protein (%)','Total Fat(g)':'Total Fat(%)','Cholesterol (mg)':'Cholesterol (%)',
               'Saturated Fat (g)':'Saturated Fat (%)','Sodium (mg)':'Sodium (%)','Potassium (mg)':'Potassium (%)',
               'Carbohydrates (g)':'Carbohydrates (%)','Fiber (g)':'Fiber (g)'})

And now let's chart them again with a uniform scale


In [None]:
df = overage_foods.sort_values(by='Weight (g)').head(10).rename(index=str,columns={'Protein (g)':'Protein (%)','Total Fat(g)':'Total Fat(%)','Cholesterol (mg)':'Cholesterol (%)',
               'Saturated Fat (g)':'Saturated Fat (%)','Sodium (mg)':'Sodium (%)','Potassium (mg)':'Potassium (%)',
               'Carbohydrates (g)':'Carbohydrates (%)','Fiber (g)':'Fiber (g)'})
a = Bar(df, label='vars',group='Name', 
        values=blend('Protein (%)', 'Total Fat(%)','Cholesterol (%)',
                     'Saturated Fat (%)','Sodium (%)','Potassium (%)',
                     'Carbohydrates (%)','Fiber (%)',name='values', labels_name='vars'),
        title="Excess Nutrients (% above recommended daily intake)",width=900)
a.xaxis.axis_label = ""
a.yaxis.axis_label = "% above reccomended daily intake"
show(a)
output_notebook()

Holy Guacamole! Look at that sodium!

It looks like potassium and sodium are crazy high compared to the other nutrients, which is most likely because they're the only nutrients measured in milligrams instead of grams, so let's see what the graph looks like without them so we can get a better understanding of the other nutrients

In [None]:
df = overage_foods.sort_values(by='Weight (g)').head(10).rename(index=str,columns={'Protein (g)':'Protein (%)','Total Fat(g)':'Total Fat(%)','Cholesterol (mg)':'Cholesterol (%)',
               'Saturated Fat (g)':'Saturated Fat (%)','Sodium (mg)':'Sodium (%)','Potassium (mg)':'Potassium (%)',
               'Carbohydrates (g)':'Carbohydrates (%)','Fiber (g)':'Fiber (g)'})
a = Bar(df, label='vars',group='Name', 
        values=blend('Protein (%)', 'Total Fat(%)','Cholesterol (%)',
                     'Saturated Fat (%)',
                     'Carbohydrates (%)','Fiber (%)',name='values', labels_name='vars'),
        title="Excess Nutrients (% above recommended daily intake) (Excluding Sodium & Potassium)",width=900,height=1000,palette=palettes.BrBG11)
a.xaxis.axis_label = ""
a.yaxis.axis_label = "% above reccomended daily intake"
show(a)
output_notebook()

So overall, it appears that these foods are providing mainly carbs and sodium at the expense of other nutrients 

## What about nutrients per calorie?

As most people know, the average optimal number of calories that someone should eat in one day is 2000. The work we have done thus far is concerned with the amount of nutrients per gram, but grams are not necessarily the best indication of the nutritional value of your food. Because of this, we want to do similar analysis, but optimizing the nutrients per calorie, rather than nutrients per gram.

In [None]:
# Here we do some preliminary data preparation, mostly copying what was done before.

calfoods = pd.read_csv('ABBREV.csv', index_col=0)

#Rename some of our columns to something a bit easier on the eyes
calfoods = calfoods.rename(index=str,columns={'Shrt_Desc':'Name','Protein_(g)':'Protein (g)','Lipid_Tot_(g)':'Total Fat(g)','Cholestrl_(mg)':'Cholesterol (mg)',
               'FA_Sat_(g)':'Saturated Fat (g)','Sodium_(mg)':'Sodium (mg)','Potassium_(mg)':'Potassium (mg)',
               'Carbohydrt_(g)':'Carbohydrates (g)','Fiber_TD_(g)':'Fiber (g)','Energ_Kcal':'Calories'})
# Look at a specific subset of nutrients
calnutrients = ['Name','Protein (g)','Total Fat(g)','Cholesterol (mg)',
               'Saturated Fat (g)','Sodium (mg)','Potassium (mg)',
               'Carbohydrates (g)','Fiber (g)','Calories']
calfoods = calfoods[calnutrients]
calfoods = calfoods.fillna(0)
# Get rid of foods we dont have serving sizes for
calfoods = calfoods[calfoods.apply(lambda x:x['Calories'] > 0, axis=1)]

# Convert sodium and potassium to grams


# Normalizing all our foods to 1 calorie
def normalizeNutrientsCal(x):
    ratio = x['Calories']
    for nutrient in calnutrients:
        if(type(x[nutrient]) is str):
            continue
        x[nutrient] = x[nutrient]/ratio
    return x
calfoods = calfoods.apply(normalizeNutrientsCal,axis=1)
calfoods.head()

# We only want foods that have a chance to sustain our needs
def filterNutrientsCal(x):
    for nutrient in calnutrients:
        if(x[nutrient] <= 0):
            return False
    return True
calfoods = calfoods[calfoods.apply(filterNutrientsCal, axis=1)]
calfoods.head(5)

As we can see, we now have all of the foods with their nutrients weighted per calorie. However, this does not tell us a whole lot. Lets find out how many calories we would need to eat for each food to get our nutritional goal.

In [None]:
# http://www.netrition.com/rdi_page.html
recommended = [-1,50,65,300,20,2400,3500,300,25,-1]
def findSatisfyingWeightCal(food):
    for x in range(0,len(calnutrients)):
        nutrient = calnutrients[x]
        rec = recommended[x]
        if(rec == -1 or food[nutrient] >= rec):
            continue
        ratio = rec/food[nutrient]
        for y in calnutrients:
            if(type(food[y]) is str):
                continue
            food[y] = food[y]*ratio
    return food   
calweighted_foods = calfoods.apply(findSatisfyingWeightCal,axis=1)
display = ['Name','Calories','Protein (g)','Total Fat(g)','Cholesterol (mg)',
               'Saturated Fat (g)','Sodium (mg)','Potassium (mg)',
               'Carbohydrates (g)','Fiber (g)']
calweighted_foods[display].sort_values(by='Calories').head(10)

Like before, we go far over our goals for each nutrient. We also go over our preferred caloric intake. Additonally, we get a very different set of foods than when we looked at nutrients per gram. Let's now look and see how far over each of these we are going.

In [None]:
from bokeh.io import push_notebook,show,output_notebook
from bokeh.layouts import row
from bokeh.plotting import figure
from bokeh.charts import Bar, output_file, show
from bokeh.models import Range1d
from bokeh.charts.operations import blend
from bokeh import palettes
def findOveragesCal(food):
    for x in range(0,len(calnutrients)):
        nutrient = calnutrients[x]
        rec = recommended[x]
        if(rec == -1):
            continue
        food[nutrient] -= rec
    return food   
caloverage_foods = calweighted_foods.apply(findOveragesCal,axis=1)
df = caloverage_foods.sort_values(by='Calories').head(10)
a = Bar(df, 'Name', values='Protein (g)', title="Excess Protein",legend=False,width=450,continuous_range=Range1d(0,100))
b = Bar(df, 'Name', values='Total Fat(g)', title="Excess Total Fat",legend=False,width=450)
a.xaxis.axis_label = ""
b.xaxis.axis_label = ""
show(row(a,b))
a = Bar(df, 'Name', values='Cholesterol (mg)', title="Excess Cholesterol",legend=False,width=450)
b = Bar(df, 'Name', values='Saturated Fat (g)', title="Excess Saturated Fats",legend=False,width=450)
a.xaxis.axis_label = ""
b.xaxis.axis_label = ""
show(row(a,b))
a = Bar(df, 'Name', values='Sodium (mg)', title="Excess Sodium",legend=False,width=450)
b = Bar(df, 'Name', values='Potassium (mg)', title="Excess Potassium",legend=False,width=450)
a.xaxis.axis_label = ""
b.xaxis.axis_label = ""
show(row(a,b))
a = Bar(df, 'Name', values='Carbohydrates (g)', title="Excess Carbohydrates",legend=False,width=450)
b = Bar(df, 'Name', values='Fiber (g)', title="Excess Fiber",legend=False,width=450)
a.xaxis.axis_label = ""
b.xaxis.axis_label = ""
show(row(a,b))
output_notebook()

Like before, let's look at these as a percentage of the 2000 calorie diet.

In [None]:
def findPercentOveragesCal(food):
    for x in range(0,len(calnutrients)):
        nutrient = calnutrients[x]
        rec = recommended[x]
        if(rec == -1):
            continue
        food[nutrient] = ((food[nutrient]/rec)*100)-100
    return food   
caloverage_foods = calweighted_foods.apply(findPercentOveragesCal,axis=1)
caloverage_foods[display].sort_values(by='Calories').head(10).rename(index=str,columns={'Protein (g)':'Protein (%)','Total Fat(g)':'Total Fat(%)','Cholesterol (mg)':'Cholesterol (%)',
               'Saturated Fat (g)':'Saturated Fat (%)','Sodium (mg)':'Sodium (%)','Potassium (mg)':'Potassium (%)',
               'Carbohydrates (g)':'Carbohydrates (%)','Fiber (g)':'Fiber (g)'})

Charting again with a uniform scale:

In [None]:
df = caloverage_foods.sort_values(by='Calories').head(10).rename(index=str,columns={'Protein (g)':'Protein (%)','Total Fat(g)':'Total Fat(%)','Cholesterol (mg)':'Cholesterol (%)',
               'Saturated Fat (g)':'Saturated Fat (%)','Sodium (mg)':'Sodium (%)','Potassium (mg)':'Potassium (%)',
               'Carbohydrates (g)':'Carbohydrates (%)','Fiber (g)':'Fiber (g)'})
a = Bar(df, label='vars',group='Name', 
        values=blend('Protein (%)', 'Total Fat(%)','Cholesterol (%)',
                     'Saturated Fat (%)','Sodium (%)','Potassium (%)',
                     'Carbohydrates (%)','Fiber (%)',name='values', labels_name='vars'),
        title="Excess Nutrients (% above recommended daily intake)",width=900)
a.xaxis.axis_label = ""
a.yaxis.axis_label = "% above reccomended daily intake"
show(a)
output_notebook()

Again, our sodium and potassium are much higher than the other nutrients. Let's take them out and take another look.

In [None]:
df = caloverage_foods.sort_values(by='Calories').head(10).rename(index=str,columns={'Protein (g)':'Protein (%)','Total Fat(g)':'Total Fat(%)','Cholesterol (mg)':'Cholesterol (%)',
               'Saturated Fat (g)':'Saturated Fat (%)','Sodium (mg)':'Sodium (%)','Potassium (mg)':'Potassium (%)',
               'Carbohydrates (g)':'Carbohydrates (%)','Fiber (g)':'Fiber (g)'})
a = Bar(df, label='vars',group='Name', 
        values=blend('Protein (%)', 'Total Fat(%)','Cholesterol (%)',
                     'Saturated Fat (%)',
                     'Carbohydrates (%)','Fiber (%)',name='values', labels_name='vars'),
        title="Excess Nutrients (% above recommended daily intake) (Excluding Sodium & Potassium)",width=900,height=1000,palette=palettes.BrBG11)
a.xaxis.axis_label = ""
a.yaxis.axis_label = "% above reccomended daily intake"
show(a)
output_notebook()

While these percentages aren't nearly as high as when looking at the overages for weight, we can still see a lot of overages for per calorie nutrients. However, it seems now that instead of sacrificing other nutrients for carbs, it is sacrificing them for cholesterol.

## What's in your cart?

Let's look at the average of food nutrients found in this dataset, and see if there are any weird things happening...
THINGS TO ADD TO THIS - Maybe show recommended nutrient intake stacked against this

In [None]:
print avg_nutrients
avg_nutrients["Sodium (mg)"] /= 1000
avg_nutrients["Cholesterol (mg)"] /= 1000
avg_nutrients["Potassium (mg)"] /= 1000
p = Bar(avg_nutrients)
show(p)