In [8]:
import re
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from soup import getSoup

In [9]:
def makeURL(text):
    return (text.lower()).replace(" ", "-")

dHalls = ["South Quad", "North Quad", "East Quad", "Bursley", "Mosher Jordan", "Twigs at Oxford", "Markley"]
dURLS = [makeURL(x) for x in dHalls]
print(dURLS)

['south-quad', 'north-quad', 'east-quad', 'bursley', 'mosher-jordan', 'twigs-at-oxford', 'markley']


In [10]:
page = f'https://dining.umich.edu/menus-locations/dining-halls/{dURLS[0]}/'
#page = 'https://www.espn.com/fantasy/football/story/_/id/39570398/fantasy-football-2024-offensive-depth-charts'
soup = getSoup(page)

In [75]:
menuDiv = soup.find("div", {"id": "mdining-items"} )
meals = menuDiv.find_all("h3")
meals = [x.text.strip() for x in meals]
meals

['Breakfast', 'Lunch', 'Dinner']

In [78]:
nutritionDict = {
    "Dining_Hall": [],
    "Course": [],
    "Menu_Item": [],
    "Serving_Size": [],
    "Calories": [],
    "Total_Fat": [],
    "Saturated_Fat": [],
    "Trans_Fat": [],
    "Cholesterol": [],
    "Sodium": [],
    "Total_Carbohydrate": [],
    "Dietary_Fiber": [],
    "Sugars": [],
    "Protein": [],
}

In [70]:
def parseFact(fact_label: str):
    splits = fact_label.split(' ')
    labelSplit = splits[:-1]
    valueSplit = splits[-1]

    label = '_'.join(labelSplit)
    if 'm' in valueSplit:
        value = valueSplit.split('m')[0]
    else:
        value = valueSplit.split('g')[0]

    return label, value

In [82]:
def fillNutrDict(menu_dict: dict, dining_hall: str, course: str, menu_item: str, nutrition_input: list): # ** DICTIONARY PASSED BY REF

    #TODO: might have to change how input is parsed through (could be more nutrition rows, NaN data, etc.)
    #TODO: check for formatting inconsistencies (insert null values if needed)

    # add dining hall
    menu_dict['Dining_Hall'].append(dining_hall)

    # add course
    menu_dict['Course'].append(course)

    # add menu item name
    menu_dict['Menu_Item'].append(menu_item)

    # add serving size
    servingSize = nutrition_input[0].split('(')
    servingSize = servingSize[1].split('g')[0]
    menu_dict['Serving_Size'].append(servingSize)

    # add calories
    calories = nutrition_input[2].split(' ')[1]
    menu_dict['Calories'].append(calories)

    # add facts from rows idx: 6-12
    for i in range(4, 13):
        labe, val = parseFact(nutrition_input[i])
        menu_dict[labe].append(val)

    

In [83]:
def getMenuItems(dining_hall: str, course: str, menu_dict: dict, courseHTML):
    '''
    Fills menu dictionary with all menu items from a singular course
    '''
    
    # list of all item sections (len = num of kitchens)
    itemClass = courseHTML.find_all("ul", { "class" : "items"}) 

    # list of all menu items (contains -- class: item-name, class: nutrition)
    menuItemDivs = [] 
    for kitchen in itemClass:
        temp = kitchen.find_all("li", recursive = False)
        menuItemDivs += temp

    # menu item loop
    for itemDiv in menuItemDivs:

        # get menu-item names
        itemName = itemDiv.find("div", {"class": "item-name"}).text.strip() 

        # get nutritional value list
        nutrDiv = itemDiv.find("table", { "class": "nutrition-facts"})
        nutrRows = nutrDiv.find_all("tr")
        nutrRows = [x.find("td").text.strip() for x in nutrRows if x.find("td") is not None]

        # add item to dictionary
        fillNutrDict(menu_dict, dining_hall, course, itemName, nutrRows)        
    



In [84]:
courses = menuDiv.find_all("ul", {"class": "courses_wrapper"})
brek = courses[0]
getMenuItems("South Quad", "Breakfast", nutritionDict, brek)

In [85]:
nutritionDict

{'Dining_Hall': ['South Quad',
  'South Quad',
  'South Quad',
  'South Quad',
  'South Quad',
  'South Quad',
  'South Quad',
  'South Quad',
  'South Quad',
  'South Quad'],
 'Course': ['Breakfast',
  'Breakfast',
  'Breakfast',
  'Breakfast',
  'Breakfast',
  'Breakfast',
  'Breakfast',
  'Breakfast',
  'Breakfast',
  'Breakfast'],
 'Menu_Item': ['Oatmeal',
  'Kale Saute',
  'Southwest Tofu Scramble',
  'Hash Browns',
  'Scrambled Eggs',
  'Pork Sausage Links',
  'Blueberry Buttermilk Pancakes',
  'Chocolate Chocolate Chip Muffins',
  'Blueberry Muffins',
  'Assorted Mini Danishes with Nuts'],
 'Serving_Size': ['253', '85', '95', '91', '84', '51', '71', '54', '54', '43'],
 'Calories': ['183',
  '69',
  '124',
  '92',
  '163',
  '181',
  '105',
  '176',
  '164',
  '132'],
 'Total_Fat': ['3', '4', '7', '2', '12', '18', '1', '8', '7', '7'],
 'Saturated_Fat': ['1', '0', '1', '0', '4', '6', '0', '2', '1', '3'],
 'Trans_Fat': ['0', '0', '0', '0', '0', '0', '0', '0', '0', '0'],
 'Cholester

In [86]:
df = pd.DataFrame(nutritionDict)

In [87]:
df

Unnamed: 0,Dining_Hall,Course,Menu_Item,Serving_Size,Calories,Total_Fat,Saturated_Fat,Trans_Fat,Cholesterol,Sodium,Total_Carbohydrate,Dietary_Fiber,Sugars,Protein
0,South Quad,Breakfast,Oatmeal,253,183,3,1,0,0,3,33,5,1,6
1,South Quad,Breakfast,Kale Saute,85,69,4,0,0,0,111,8,2,0,3
2,South Quad,Breakfast,Southwest Tofu Scramble,95,124,7,1,0,0,282,5,2,1,11
3,South Quad,Breakfast,Hash Browns,91,92,2,0,0,0,109,17,1,0,1
4,South Quad,Breakfast,Scrambled Eggs,84,163,12,4,0,336,116,0,0,0,11
5,South Quad,Breakfast,Pork Sausage Links,51,181,18,6,0,45,340,2,0,0,9
6,South Quad,Breakfast,Blueberry Buttermilk Pancakes,71,105,1,0,0,0,434,22,1,4,3
7,South Quad,Breakfast,Chocolate Chocolate Chip Muffins,54,176,8,2,0,17,172,24,1,15,2
8,South Quad,Breakfast,Blueberry Muffins,54,164,7,1,0,17,138,23,1,13,2
9,South Quad,Breakfast,Assorted Mini Danishes with Nuts,43,132,7,3,0,10,46,15,1,4,2
