Scraping McDonalds' Menu

In [8]:
import pandas as pd
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.options import Options
from bs4 import BeautifulSoup
import os
import statsmodels.api as sm
import numpy as np
from scipy import stats
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report


# Set output directory
output_dir = r"C:\Users\asmit\OneDrive\Desktop\fast_food_nutrition\data"
os.makedirs(output_dir, exist_ok=True)

# Unique file paths
mcd_source_path = os.path.join(output_dir, 'mcdonalds_page_source.html')
mcd_csv_path = os.path.join(output_dir, 'mcdonalds_nutrition.csv')

# Configure Chrome options
chrome_options = Options()
chrome_options.add_argument("--disable-gpu")
chrome_options.add_argument("--no-sandbox")
chrome_options.add_argument("--window-size=1920,1080")
chrome_options.add_argument("user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36")

service = Service()
driver = webdriver.Chrome(service=service, options=chrome_options)

try:
    url = "https://www.nutritionix.com/mcdonalds/menu/premium"
    driver.get(url)


    soup = BeautifulSoup(driver.page_source, 'html.parser')

    # Save page source
    with open(mcd_source_path, 'w', encoding='utf-8') as f:
        f.write(soup.prettify())
    print(f"Saved page source to: {mcd_source_path}")

    menu_data = []

    # Find nutrition table
    table = soup.find('table', class_='tblCompare')
    if not table:
        raise Exception("Nutrition table not found")

    rows = table.find_all('tr', class_=['odd', 'even'])

    for row in rows:
        name_cell = row.find('td', class_='al')
        if not name_cell:
            continue
        name_link = name_cell.find('a', class_='nmItem')
        item_name = name_link.text.strip() if name_link else "Unknown"

        nutrition_cells = row.find_all('td', class_='col')
        if len(nutrition_cells) < 10:
            continue

        menu_data.append({
            'Item': item_name,
            'Calories': nutrition_cells[0].text.strip(),
            'Total Fat (g)': nutrition_cells[1].text.strip(),
            'Saturated Fat (g)': nutrition_cells[2].text.strip(),
            'Trans Fat (g)': nutrition_cells[3].text.strip(),
            'Cholesterol (mg)': nutrition_cells[4].text.strip(),
            'Sodium (mg)': nutrition_cells[5].text.strip().replace(',', ''),
            'Total Carbohydrates (g)': nutrition_cells[6].text.strip(),
            'Dietary Fiber (g)': nutrition_cells[7].text.strip(),
            'Sugars (g)': nutrition_cells[8].text.strip(),
            'Protein (g)': nutrition_cells[10].text.strip()
        })

    df = pd.DataFrame(menu_data)
    df.to_csv(mcd_csv_path, index=False)
    print(f" Successfully saved {len(df)} items to: {mcd_csv_path}")

except Exception as e:
    print(f"\n Error: {e}")
    screenshot_path = os.path.join(output_dir, 'mcdonalds_error_screenshot.png')
    driver.save_screenshot(screenshot_path)
    print(f"Screenshot saved to: {screenshot_path}")
    print(f"Page source saved to: {mcd_source_path}")

finally:
    driver.quit()

Saved page source to: C:\Users\asmit\OneDrive\Desktop\fast_food_nutrition\data\mcdonalds_page_source.html
 Successfully saved 193 items to: C:\Users\asmit\OneDrive\Desktop\fast_food_nutrition\data\mcdonalds_nutrition.csv


Scraping Chick Fil A menu

In [9]:
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from bs4 import BeautifulSoup
import pandas as pd
import time

# Configure Chrome options
chrome_options = Options()
# chrome_options.add_argument("--headless")  # Uncomment when done testing
chrome_options.add_argument("--disable-gpu")
chrome_options.add_argument("--no-sandbox")
chrome_options.add_argument("--window-size=1920,1080")
chrome_options.add_argument("user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36")

service = Service()
driver = webdriver.Chrome(service=service, options=chrome_options)

try:
    cfa_url = "https://www.nutritionix.com/chick-fil-a/menu/premium?desktop"
    driver.get(cfa_url)

    # Wait for the table to load
    WebDriverWait(driver, 10).until(
        EC.presence_of_element_located((By.CLASS_NAME, 'tblCompare'))
    )

    # Scroll to bottom to load all content
    SCROLL_PAUSE_TIME = 1.5
    last_height = driver.execute_script("return document.body.scrollHeight")
    while True:
        driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
        time.sleep(SCROLL_PAUSE_TIME)
        new_height = driver.execute_script("return document.body.scrollHeight")
        if new_height == last_height:
            break
        last_height = new_height

    # Parse the fully-loaded page source
    cfa_soup = BeautifulSoup(driver.page_source, 'html.parser')

    # Save HTML locally
    cfa_source_path = r"C:\Users\asmit\OneDrive\Desktop\fast_food_nutrition\data\cfa_page_source.html"
    with open(cfa_source_path, 'w', encoding='utf-8') as f:
        f.write(cfa_soup.prettify())
    print(f"Saved page source to: {cfa_source_path}")

    cfa_menu_data = []

    # Locate the nutrition table
    cfa_table = cfa_soup.find('table', class_='tblCompare')
    if not cfa_table:
        raise Exception("Chick-fil-A nutrition table not found")

    cfa_rows = cfa_table.find_all('tr', class_=['odd', 'even'])

    for row in cfa_rows:
        name_cell = row.find('td', class_='al')
        if not name_cell:
            continue

        name_link = name_cell.find('a', class_='nmItem')
        cfa_item_name = name_link.text.strip() if name_link else "Unknown"

        cfa_nutrition_cells = row.find_all('td', class_='col')
        if len(cfa_nutrition_cells) < 10:
            continue

        cfa_menu_data.append({
            'Item': cfa_item_name,
            'Calories': cfa_nutrition_cells[0].text.strip(),
            'Total Fat (g)': cfa_nutrition_cells[1].text.strip(),
            'Saturated Fat (g)': cfa_nutrition_cells[2].text.strip(),
            'Trans Fat (g)': cfa_nutrition_cells[3].text.strip(),
            'Cholesterol (mg)': cfa_nutrition_cells[4].text.strip(),
            'Sodium (mg)': cfa_nutrition_cells[5].text.strip().replace(',', ''),
            'Total Carbohydrates (g)': cfa_nutrition_cells[6].text.strip(),
            'Dietary Fiber (g)': cfa_nutrition_cells[7].text.strip(),
            'Sugars (g)': cfa_nutrition_cells[8].text.strip(),
            'Protein (g)': cfa_nutrition_cells[9].text.strip()
        })

    if not cfa_menu_data:
        raise Exception(f"No Chick-fil-A menu items found. Check the page source: {cfa_source_path}")

    cfa_df = pd.DataFrame(cfa_menu_data)
    print("\nFirst 5 Chick-fil-A items:")
    print(cfa_df.head())

    # Save the dataframe to your project directory
    cfa_csv_path = r"C:\Users\asmit\OneDrive\Desktop\fast_food_nutrition\data\chick_fil_a_nutrition.csv"
    cfa_df.to_csv(cfa_csv_path, index=False)
    print(f"\nSuccessfully saved {len(cfa_df)} items to: {cfa_csv_path}")

except Exception as e:
    print(f"\nError: {e}")
    cfa_screenshot_path = r"C:\Users\asmit\OneDrive\Desktop\fast_food_nutrition\data\cfa_error_screenshot.png"
    driver.save_screenshot(cfa_screenshot_path)
    print(f"Screenshot saved to: {cfa_screenshot_path}")
    print(f"Page source saved to: {cfa_source_path}")

finally:
    driver.quit()


Saved page source to: C:\Users\asmit\OneDrive\Desktop\fast_food_nutrition\data\cfa_page_source.html

First 5 Chick-fil-A items:
                                 Item Calories Total Fat (g)  \
0   4 Count Chick-Fil-A Chick-N-Minis      360            13   
1            4 Count Mini Yeast Rolls      240             8   
2  10 Count Chick-Fil-A Chick-N-Minis      910            34   
3         Bacon, Egg & Cheese Biscuit      420            23   
4          Bacon, Egg & Cheese Muffin      300            13   

  Saturated Fat (g) Trans Fat (g) Cholesterol (mg) Sodium (mg)  \
0                 4             0               60        1060   
1               2.5             0               20         450   
2                10             0              150        2640   
3                11             0              180        1220   
4                 6             0              180         780   

  Total Carbohydrates (g) Dietary Fiber (g) Sugars (g) Protein (g)  
0                    

Scraping Burger King Menu

In [10]:
# Configure Chrome options
chrome_options = Options()
# chrome_options.add_argument("--headless")  # Uncomment when done testing
chrome_options.add_argument("--disable-gpu")
chrome_options.add_argument("--no-sandbox")
chrome_options.add_argument("--window-size=1920,1080")
chrome_options.add_argument("user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36")

service = Service()
driver = webdriver.Chrome(service=service, options=chrome_options)

try:
    bk_url = "https://www.nutritionix.com/burger-king/menu/premium?desktop"
    driver.get(bk_url)
    


    # Parse the page source
    bk_soup = BeautifulSoup(driver.page_source, 'html.parser')

    # Save HTML locally
    bk_source_path = r"C:\Users\asmit\OneDrive\Desktop\fast_food_nutrition\data\bk_page_source.html"
    with open(bk_source_path, 'w', encoding='utf-8') as f:
        f.write(bk_soup.prettify())
    print(f"Saved page source to: {bk_source_path}")
    
    bk_menu_data = []

    # Locate the nutrition table
    bk_table = bk_soup.find('table', class_='tblCompare')
    if not bk_table:
        raise Exception("Burger King nutrition table not found")

    bk_rows = bk_table.find_all('tr', class_=['odd', 'even'])

    for row in bk_rows:
        name_cell = row.find('td', class_='al')
        if not name_cell:
            continue

        name_link = name_cell.find('a', class_='nmItem')
        bk_item_name = name_link.text.strip() if name_link else "Unknown"

        bk_nutrition_cells = row.find_all('td', class_='col')
        if len(bk_nutrition_cells) < 10:
            continue

        bk_menu_data.append({
            'Item': bk_item_name,
            'Calories': bk_nutrition_cells[0].text.strip(),
            'Total Fat (g)': bk_nutrition_cells[1].text.strip(),
            'Saturated Fat (g)': bk_nutrition_cells[2].text.strip(),
            'Trans Fat (g)': bk_nutrition_cells[3].text.strip(),
            'Cholesterol (mg)': bk_nutrition_cells[4].text.strip(),
            'Sodium (mg)': bk_nutrition_cells[5].text.strip().replace(',', ''),
            'Total Carbohydrates (g)': bk_nutrition_cells[6].text.strip(),
            'Dietary Fiber (g)': bk_nutrition_cells[7].text.strip(),
            'Sugars (g)': bk_nutrition_cells[8].text.strip(),
            'Protein (g)': bk_nutrition_cells[9].text.strip()
        })

    if not bk_menu_data:
        raise Exception(f"No Burger King menu items found. Check the page source: {bk_source_path}")

    bk_df = pd.DataFrame(bk_menu_data)
    print("\nFirst 5 Burger King items:")
    print(bk_df.head())

    # Save the dataframe to your project directory
    bk_csv_path = r"C:\Users\asmit\OneDrive\Desktop\fast_food_nutrition\data\burger_king_nutrition.csv"
    bk_df.to_csv(bk_csv_path, index=False)
    print(f"\nSuccessfully saved {len(bk_df)} items to: {bk_csv_path}")

except Exception as e:
    print(f"\nError: {e}")
    bk_screenshot_path = r"C:\Users\asmit\OneDrive\Desktop\fast_food_nutrition\data\bk_error_screenshot.png"
    driver.save_screenshot(bk_screenshot_path)
    print(f"Screenshot saved to: {bk_screenshot_path}")
    print(f"Page source saved to: {bk_source_path}")

finally:
    driver.quit()


Saved page source to: C:\Users\asmit\OneDrive\Desktop\fast_food_nutrition\data\bk_page_source.html

First 5 Burger King items:
                                    Item Calories Total Fat (g)  \
0            3 Piece French Toast Sticks      350            12   
1            5 Piece French Toast Sticks      520            19   
2           Bacon, Egg, & Cheese Biscuit      450            30   
3     Bacon, Egg, & Cheese Croissan'wich      410            25   
4  Bacon, Sausage, Egg, & Cheese Biscuit      710            53   

  Saturated Fat (g) Trans Fat (g) Cholesterol (mg) Sodium (mg)  \
0                 2             0                0         220   
1                 4             0                0         350   
2                14             1              235        1580   
3                12             1              260        1030   
4                24             1              290        2390   

  Total Carbohydrates (g) Dietary Fiber (g) Sugars (g) Protein (g)  
0   

Scraping KFC menu

In [11]:
# Configure Chrome options
chrome_options = Options()
# chrome_options.add_argument("--headless")  # Uncomment for headless mode
chrome_options.add_argument("--disable-gpu")
chrome_options.add_argument("--no-sandbox")
chrome_options.add_argument("--window-size=1920,1080")
chrome_options.add_argument("user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36")

service = Service()
driver = webdriver.Chrome(service=service, options=chrome_options)

try:
    kfc_url = "https://www.nutritionix.com/kfc/menu/premium?desktop"
    driver.get(kfc_url)
    


    kfc_soup = BeautifulSoup(driver.page_source, 'html.parser')

    # Save HTML page source
    kfc_source_path = r"C:\Users\asmit\OneDrive\Desktop\fast_food_nutrition\data\kfc_page_source.html"
    with open(kfc_source_path, 'w', encoding='utf-8') as f:
        f.write(kfc_soup.prettify())
    print(f"Saved page source to: {kfc_source_path}")
    
    kfc_menu_data = []

    kfc_table = kfc_soup.find('table', class_='tblCompare')
    if not kfc_table:
        raise Exception("KFC nutrition table not found")

    kfc_rows = kfc_table.find_all('tr', class_=['odd', 'even'])

    for row in kfc_rows:
        name_cell = row.find('td', class_='al')
        if not name_cell:
            continue

        name_link = name_cell.find('a', class_='nmItem')
        kfc_item_name = name_link.text.strip() if name_link else "Unknown"

        kfc_nutrition_cells = row.find_all('td', class_='col')
        if len(kfc_nutrition_cells) < 10:
            continue

        kfc_menu_data.append({
            'Item': kfc_item_name,
            'Calories': kfc_nutrition_cells[0].text.strip(),
            'Total Fat (g)': kfc_nutrition_cells[1].text.strip(),
            'Saturated Fat (g)': kfc_nutrition_cells[2].text.strip(),
            'Trans Fat (g)': kfc_nutrition_cells[3].text.strip(),
            'Cholesterol (mg)': kfc_nutrition_cells[4].text.strip(),
            'Sodium (mg)': kfc_nutrition_cells[5].text.strip().replace(',', ''),
            'Total Carbohydrates (g)': kfc_nutrition_cells[6].text.strip(),
            'Dietary Fiber (g)': kfc_nutrition_cells[7].text.strip(),
            'Sugars (g)': kfc_nutrition_cells[8].text.strip(),
            'Protein (g)': kfc_nutrition_cells[10].text.strip()
        })

    if not kfc_menu_data:
        raise Exception(f"No KFC menu items found. Check the page source: {kfc_source_path}")

    kfc_df = pd.DataFrame(kfc_menu_data)
    print("\nFirst 5 KFC items:")
    print(kfc_df.head())

    # Save as CSV
    kfc_csv_path = r"C:\Users\asmit\OneDrive\Desktop\fast_food_nutrition\data\kfc_nutrition.csv"
    kfc_df.to_csv(kfc_csv_path, index=False)
    print(f"\nSuccessfully saved {len(kfc_df)} items to: {kfc_csv_path}")

except Exception as e:
    print(f"\nError: {e}")
    kfc_screenshot_path = r"C:\Users\asmit\OneDrive\Desktop\fast_food_nutrition\data\kfc_error_screenshot.png"
    driver.save_screenshot(kfc_screenshot_path)
    print(f"Screenshot saved to: {kfc_screenshot_path}")
    print(f"Page source saved to: {kfc_source_path}")

finally:
    driver.quit()


Saved page source to: C:\Users\asmit\OneDrive\Desktop\fast_food_nutrition\data\kfc_page_source.html

First 5 KFC items:
                                    Item Calories Total Fat (g)  \
0                      Apple Pie Poppers       80             5   
1                     Cherry Pie Poppers       70           4.5   
2  KFC Chicken Sandwich - Chipotle Ranch      670            38   
3       KFC Chicken Sandwich - Honey BBQ      600            26   
4    KFC Chicken Sandwich - Honey Garlic      610            26   

  Saturated Fat (g) Trans Fat (g) Cholesterol (mg) Sodium (mg)  \
0               1.5             0                0          55   
1                 0             0                0          40   
2                 5             0               90        1330   
3                 3             0               80        1330   
4                 3             0               80        1370   

  Total Carbohydrates (g) Dietary Fiber (g) Sugars (g) Protein (g)  
0          

Scraping SubWay menu

In [12]:
# Configure Chrome options
chrome_options = Options()
# chrome_options.add_argument("--headless")  # Enable this after testing
chrome_options.add_argument("--disable-gpu")
chrome_options.add_argument("--no-sandbox")
chrome_options.add_argument("--window-size=1920,1080")
chrome_options.add_argument("user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36")

service = Service()
driver = webdriver.Chrome(service=service, options=chrome_options)

try:
    subway_url = "https://www.nutritionix.com/subway/menu/premium?desktop"
    driver.get(subway_url)
    

    
    subway_soup = BeautifulSoup(driver.page_source, 'html.parser')

    # Save HTML page source
    subway_source_path = r"C:\Users\asmit\OneDrive\Desktop\fast_food_nutrition\data\subway_page_source.html"
    with open(subway_source_path, 'w', encoding='utf-8') as f:
        f.write(subway_soup.prettify())
    print(f"Saved page source to: {subway_source_path}")
    
    subway_menu_data = []

    subway_table = subway_soup.find('table', class_='tblCompare')
    if not subway_table:
        raise Exception("Subway nutrition table not found")

    subway_rows = subway_table.find_all('tr', class_=['odd', 'even'])

    for row in subway_rows:
        name_cell = row.find('td', class_='al')
        if not name_cell:
            continue

        name_link = name_cell.find('a', class_='nmItem')
        subway_item_name = name_link.text.strip() if name_link else "Unknown"

        subway_nutrition_cells = row.find_all('td', class_='col')
        if len(subway_nutrition_cells) < 10:
            continue

        subway_menu_data.append({
            'Item': subway_item_name,
            'Calories': subway_nutrition_cells[0].text.strip(),
            'Total Fat (g)': subway_nutrition_cells[2].text.strip(),
            'Saturated Fat (g)': subway_nutrition_cells[3].text.strip(),
            'Trans Fat (g)': subway_nutrition_cells[4].text.strip(),
            'Cholesterol (mg)': subway_nutrition_cells[5].text.strip(),
            'Sodium (mg)': subway_nutrition_cells[6].text.strip().replace(',', ''),
            'Total Carbohydrates (g)': subway_nutrition_cells[7].text.strip(),
            'Dietary Fiber (g)': subway_nutrition_cells[8].text.strip(),
            'Sugars (g)': subway_nutrition_cells[9].text.strip(),
            'Protein (g)': subway_nutrition_cells[10].text.strip()
        })

    if not subway_menu_data:
        raise Exception(f"No Subway menu items found. Check the page source: {subway_source_path}")

    subway_df = pd.DataFrame(subway_menu_data)
    print("\nFirst 5 Subway items:")
    print(subway_df.head())

    # Save to CSV
    subway_csv_path = r"C:\Users\asmit\OneDrive\Desktop\fast_food_nutrition\data\subway_nutrition.csv"
    subway_df.to_csv(subway_csv_path, index=False)
    print(f"\nSuccessfully saved {len(subway_df)} items to: {subway_csv_path}")

except Exception as e:
    print(f"\nError: {e}")
    subway_screenshot_path = r"C:\Users\asmit\OneDrive\Desktop\fast_food_nutrition\data\subway_error_screenshot.png"
    driver.save_screenshot(subway_screenshot_path)
    print(f"Screenshot saved to: {subway_screenshot_path}")
    print(f"Page source saved to: {subway_source_path}")

finally:
    driver.quit()


Saved page source to: C:\Users\asmit\OneDrive\Desktop\fast_food_nutrition\data\subway_page_source.html

First 5 Subway items:
                       Item Calories Total Fat (g) Saturated Fat (g)  \
0          6" #1 The Philly      510            25                 9   
1          6" #2 The Outlaw      490            22                 9   
2            6" #6 The Boss      690            38                16   
3  6" #10 All-American Club      540            28                10   
4        6" #11 Subway Club      500            24                 8   

  Trans Fat (g) Cholesterol (mg) Sodium (mg) Total Carbohydrates (g)  \
0             1               85        1320                      43   
1             1               90        1440                      44   
2             1               85        1860                      56   
3             1               75        1520                      45   
4             1               75        1520                      43   

  Dietar