In [1]:
import pandas as pd

data = "https://s3.us-east-2.amazonaws.com/bites-data/menu.csv"
# load the data in once, functions will use this module object
df = pd.read_csv(data)

pd.options.mode.chained_assignment = None  # ignore warnings


In [6]:
df.shape

(260, 24)

In [8]:
df.dtypes

Category                          object
Item                              object
Serving Size                      object
Calories                           int64
Calories from Fat                  int64
Total Fat                        float64
Total Fat (% Daily Value)          int64
Saturated Fat                    float64
Saturated Fat (% Daily Value)      int64
Trans Fat                        float64
Cholesterol                        int64
Cholesterol (% Daily Value)        int64
Sodium                             int64
Sodium (% Daily Value)             int64
Carbohydrates                      int64
Carbohydrates (% Daily Value)      int64
Dietary Fiber                      int64
Dietary Fiber (% Daily Value)      int64
Sugars                             int64
Protein                            int64
Vitamin A (% Daily Value)          int64
Vitamin C (% Daily Value)          int64
Calcium (% Daily Value)            int64
Iron (% Daily Value)               int64
dtype: object

In [9]:
df.head()

Unnamed: 0,Category,Item,Serving Size,Calories,Calories from Fat,Total Fat,Total Fat (% Daily Value),Saturated Fat,Saturated Fat (% Daily Value),Trans Fat,...,Carbohydrates,Carbohydrates (% Daily Value),Dietary Fiber,Dietary Fiber (% Daily Value),Sugars,Protein,Vitamin A (% Daily Value),Vitamin C (% Daily Value),Calcium (% Daily Value),Iron (% Daily Value)
0,Breakfast,Egg McMuffin,4.8 oz (136 g),300,120,13.0,20,5.0,25,0.0,...,31,10,4,17,3,17,10,0,25,15
1,Breakfast,Egg White Delight,4.8 oz (135 g),250,70,8.0,12,3.0,15,0.0,...,30,10,4,17,3,18,6,0,25,8
2,Breakfast,Sausage McMuffin,3.9 oz (111 g),370,200,23.0,35,8.0,42,0.0,...,29,10,4,17,2,14,8,0,25,10
3,Breakfast,Sausage McMuffin with Egg,5.7 oz (161 g),450,250,28.0,43,10.0,52,0.0,...,30,10,4,17,2,21,15,0,30,15
4,Breakfast,Sausage McMuffin with Egg Whites,5.7 oz (161 g),400,210,23.0,35,8.0,42,0.0,...,30,10,4,17,2,21,6,0,25,10


In [10]:
df.describe()

Unnamed: 0,Calories,Calories from Fat,Total Fat,Total Fat (% Daily Value),Saturated Fat,Saturated Fat (% Daily Value),Trans Fat,Cholesterol,Cholesterol (% Daily Value),Sodium,...,Carbohydrates,Carbohydrates (% Daily Value),Dietary Fiber,Dietary Fiber (% Daily Value),Sugars,Protein,Vitamin A (% Daily Value),Vitamin C (% Daily Value),Calcium (% Daily Value),Iron (% Daily Value)
count,260.0,260.0,260.0,260.0,260.0,260.0,260.0,260.0,260.0,260.0,...,260.0,260.0,260.0,260.0,260.0,260.0,260.0,260.0,260.0,260.0
mean,368.269231,127.096154,14.165385,21.815385,6.007692,29.965385,0.203846,54.942308,18.392308,495.75,...,47.346154,15.780769,1.630769,6.530769,29.423077,13.338462,13.426923,8.534615,20.973077,7.734615
std,240.269886,127.875914,14.205998,21.885199,5.321873,26.639209,0.429133,87.269257,29.091653,577.026323,...,28.252232,9.419544,1.567717,6.307057,28.679797,11.426146,24.366381,26.345542,17.019953,8.723263
min,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
25%,210.0,20.0,2.375,3.75,1.0,4.75,0.0,5.0,2.0,107.5,...,30.0,10.0,0.0,0.0,5.75,4.0,2.0,0.0,6.0,0.0
50%,340.0,100.0,11.0,17.0,5.0,24.0,0.0,35.0,11.0,190.0,...,44.0,15.0,1.0,5.0,17.5,12.0,8.0,0.0,20.0,4.0
75%,500.0,200.0,22.25,35.0,10.0,48.0,0.0,65.0,21.25,865.0,...,60.0,20.0,3.0,10.0,48.0,19.0,15.0,4.0,30.0,15.0
max,1880.0,1060.0,118.0,182.0,20.0,102.0,2.5,575.0,192.0,3600.0,...,141.0,47.0,7.0,28.0,128.0,87.0,170.0,240.0,70.0,40.0


In [21]:
def get_food_most_calories(df=df):
    """Return the food "Item" string with most calories"""
    return df[df['Calories'] == df['Calories'].max()]['Item'].values[0]

In [22]:
get_food_most_calories(df)

'Chicken McNuggets (40 piece)'

In [None]:
def get_bodybuilder_friendly_foods(df=df, excl_drinks=False):
    """Calulate the Protein/Calories ratio of foods and return the
    5 foods with the best ratio.

    This function has a excl_drinks switch which, when turned on,
    should exclude 'Coffee & Tea' and 'Beverages' from this top 5.

    You will probably need to filter out foods with 0 calories to get the
    right results.

    Return a list of the top 5 foot Item stings."""
    df = df[df['Calories'] > 0]
    df['Protein/Calories'] = df['Protein'] / df['Calories']
    df = df.sort_values(by='Protein/Calories', ascending=False)
    if excl_drinks:
        df = df[~df['Category'].isin(['Coffee & Tea', 'Beverages'])]
    return df['Item'].head(5).values
