In [2]:
import requests
import pandas as pd

In [3]:
def find_periods(date):
    '''This function takes a date as input and returns the periods associated 
    with the date from the Dine On Campus API. We need to do this because the
    API changes the period id frequently. Periods represent the time of day
    (breakfast, lunch, dinner, etc.).
    
    Args:
        date (str): The date to search for in the format YYYY-M-DD
        
    Returns:
        periods_dict (dict): The dict of periods associated with the date; 
        empty if no periods found for date
    '''

    # create an empty dictionary to store the periods
    periods_dict = dict()

    # call the API to get the json information.
    url = "https://api.dineoncampus.com/v1/location/586d05e4ee596f6e6c04b527/periods?platform=0&date=" + date
    req = requests.get(url)

    # format the response as json
    data = req.json()

    # get the periods from the json
    periods = data.get("periods")

    # for each of the periods found
    for period in periods:
        # add the name and id to the dictionary
        periods_dict[period.get("name")] = period.get("id")

    # return the final dictionary
    return periods_dict


In [4]:
def get_menu(date, meal):
    '''Returns a dataframe containing the menu for a given date
    and all of its nutritional information.
    
    Args:
        date (str): The date to search for in the format YYYY-M-DD

    Returns:
        DataFrame: A dataframe containing the menu for the given date;
        empty if no menu found for date provided
    '''
    periods = find_periods(date)

    final_df = pd.DataFrame(columns=['food'])
    final_df = final_df.set_index('food')

    for period in periods.keys():
        if (period != meal):
            continue
        
        url = "https://api.dineoncampus.com/v1/location/586d05e4ee596f6e6c04b527/periods/" + periods[period] + "?platform=0&date=" + date
        req = requests.get(url)
        # get req as json
        data = req.json()
        # get the catagories of food
        catagories = data.get('menu').get('periods').get('categories')

        pd_food = pd.DataFrame(columns=['food'])
        pd_food = pd_food.set_index('food')

        # for each catagory
        for catagory in catagories:
            items = catagory.get('items')
            # for each food in the catagory
            for food in items:
                name = food.get('name')
                nutrients = food.get('nutrients')
                portion = food.get('portion')
                for nutrient in nutrients:
                    nutrient_name = nutrient.get('name')
                    nutrient_value = nutrient.get('value_numeric')
                    pd_food.at[name, nutrient_name] = nutrient_value
                pd_food.at[name, 'Serving Size'] = portion
        
        final_df = pd.concat([final_df, pd_food], sort=True)

    return final_df
    

In [5]:
# this takes a while to run due to the slow API from Dine On Campus
food_data_breakfast = get_menu("2023-3-24", "Breakfast")
food_data_lunch = get_menu("2023-3-24", "Lunch")
food_data_dinner = get_menu("2023-3-24", "Dinner")

In [11]:
food_data_breakfast

Unnamed: 0_level_0,Calcium (mg),Calories,Calories From Fat,Cholesterol (mg),Dietary Fiber (g),Iron (mg),Potassium (mg),Protein (g),Saturated Fat (g),Saturated Fat + Trans Fat (g),Serving Size,Sodium (mg),Sugar (g),Total Carbohydrates (g),Total Fat (g),Trans Fat (g),Vitamin A (RE),Vitamin C (mg),Vitamin D (IU)
food,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1
"Ham, Egg, Cheese Breakfast Pizza",190,330,120,90,1,2.6,250,18,6,5+,1 slice,760,3+,35,13,-,60+,2,50
Cheese Pizza,210,200,90,25,1,1.3,120,10,4.5,5+,1 slice,610,1,18,10,-,20+,3,15
Scrambled Eggs,60,200,140,420,0,2,160,14,4,5,1/2 cup,160,0,1,15,0,120,0,90
Chicken Kielbasa Sausage,100,120,90,55,0,-,-,7,2.5,0,1/2 each,510,0,0,10,0,-,-,-
Thyme Roasted Potatoes,20,110,30,0,2,1.1,470,2,0,0,1/2 cup,230,1,17,3.5,0,5+,11,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
Lucky Charms,130,140,15,0,2,-,80,3,0,0,1 cup,230,12,30,1.5,0,80,9,90
Cinnamon Toast Crunch,170,170,35,0,3,6,70,1,0.5,0,1 cup,240,12,33,4,0,-,12,110
Total Raisin Bran,40,180,5,0,5,14.5,210,3,0,0,1 cup,220,18,45,1,0,-,7,60
"Peanut Butter, .75 oz, Jif",10,130,100,0,1,0.4,120,5,2,0,1 each,90,2,5,11,0,-,0,0


In [6]:
def format_food_data(food_data):
    '''Formats the food data to be more readable by the model.
    
    Args:
        food_data (DataFrame): The dataframe to format'''
    
    # define columns to keep
    to_keep = ['Calories', 'Total Fat (g)', 'Saturated Fat (g)', 'Total Carbohydrates (g)', 'Sugar (g)', 'Protein (g)']

    # drop all columns not in to_keep
    food_data = food_data[to_keep]

    # drop all rows with NaN values
    food_data = food_data.dropna()

    food_data.columns = ['Energy (kcal)', 'Fat (g)', 'Saturated fat (g)', 'Carbohydrates (g)', 'Sugars (g)', 'Proteins (g)']

    
    for i in food_data.columns:
        food_data[i] = food_data[i].str.replace('+', '.0')
        food_data[i] = food_data[i].str.replace('-', '.0')

    # change entries to be floats
    food_data = food_data.astype(float)
    # for each row, add the total grams of fat, carbs, and protein
    food_data['Total Grams'] = food_data['Fat (g)'] + food_data['Carbohydrates (g)'] + food_data['Proteins (g)']

    for i in food_data.columns:
        if (i != 'Total Grams'):
            food_data[i] = food_data[i] * (100 / food_data['Total Grams'])

    food_data = food_data.round(2)

    # drop the total grams column
    food_data = food_data.drop(columns=['Total Grams'])
    food_data = food_data.dropna()
    return food_data

In [7]:
df_food = format_food_data(food_data_dinner)
df_food.to_csv('nu_foods_di.csv')

  food_data[i] = food_data[i].str.replace('+', '.0')
