In [1]:
# Developled on Python version 3.11.4

from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.common.exceptions import NoSuchElementException
import json
from time import sleep
from bs4 import BeautifulSoup
import pandas as pd

# Initialization of Functions

In [17]:
# Retrieves the link for each menu section

def taco_bell_menu_section_links(driver, base_url = "https://www.tacobell.com", store_location = "?store=038911#", menu_endpoint="/food"):

    driver.get(base_url+menu_endpoint)
    cites_allowed_WS = [
    "/food/deals-and-combos",
    "/food/tacos",
    "/food/burritos",
    "/food/quesadillas",
    "/food/nachos",
    "/food/cravings-value-menu",
    "/food/sides-sweets",
    "/food/drinks",
    "/food/power-menu",
    "/food/party-packs", 
    "/food/vegetarian",
    "/food/breakfast"
]

    # Using XPath to locate the main parent div that contains all the links
    element = driver.find_element(By.XPATH, '//div[contains(@class, "styles_menu-tiles__1JTJ3")]')

    # Parse the content using BeautifulSoup
    soup = BeautifulSoup(element.get_attribute('outerHTML'), 'html.parser')

    # Extract all the links and their href values
    links = [a['href'] for a in soup.find_all('a') if a.has_attr('href')]

    allowed_links = [link for link in links if link in cites_allowed_WS]


    full_links = [base_url + link + store_location for link in allowed_links]
    
    return full_links

In [12]:
# Function that parses through the nutritonal data, and returns a dictionary 

def nutrition_info_parsing(text):
    nutrition_lines = [line.strip() for line in text.split("\n") if line.strip()]

    nutrient_dict = {}

    i = 0
    while i < len(nutrition_lines):
        line = nutrition_lines[i]

        if line == 'Calories':
            nutrient_dict['Calories'] = int(nutrition_lines[i+1])
            i += 2  # Increment by 2 to jump to the next component

        elif any(word in line for word in ['Fat' ,'Cholesterol','Includes', 'Sugars', 'Sodium', 'Carbohydrates', 'Fiber', 'Protein', 'Vitamin D', 'Calcium', 'Iron', 'Potassium']):
            nutrient = line.split()
            
            # Check if the next line contains a value (like '8g', '25mg', etc.)
            if i+1 < len(nutrition_lines) and any(val in nutrition_lines[i+1] for val in ['g', 'mg', 'mcg']):
                # Use the nutrient as key and the next line as value
                nutrient_name = ' '.join(nutrient[:-1])  # Exclude the value (like '8g') from the nutrient name
                nutrient_value = nutrient[-1]  # The value is the last part of the split
                
                if '<' in nutrient_name:
                    nutrient_name = nutrient_name.replace('<', '').strip()
                    nutrient_value = '<' + nutrient_value

                nutrient_dict[nutrient_name] = nutrient_value
                
                i += 2  # Increment by 2 to jump to the next component

            else:
                i += 1  # No expected value on the next line, move on

        else:
            i += 1

    return nutrient_dict


In [13]:
# Function that parses through the allergen info data, and returns a dictionary 


def allergen_info_parsing(text):
    allergen_lines = [line.strip() for line in text.split("\n") if line.strip('\n')]

    allergen_dict = {}

    allergens = allergen_lines[1:-4]
    for i in range(len(allergens)):
        line = allergens[i].strip(', ')
        if '!' in line:
            allergen_dict[f"{line[:-2]}"] = 1
        else:
            allergen_dict[f"{line}"] = 0
    return allergen_dict

## The next series of function

In [14]:
def get_menu_links(driver):
    # Returns the list of menu links from the main menu
    return taco_bell_menu_section_links(driver)

def get_product_links(driver, base_url="https://www.tacobell.com"):
    # Get the page's source and parse with BeautifulSoup
    soup = BeautifulSoup(driver.page_source, 'html.parser')
    links = [a['href'] for a in soup.find_all('a', class_='styles_product-title__6KCyw')]
    return [base_url + link + "#" if link.startswith('/') else link for link in links]

def extract_data_from_product_page(driver):
    # Extracts data from the product page and returns a dictionary
    subpage_soup = BeautifulSoup(driver.page_source, 'html.parser')
    header = subpage_soup.find_all('h1')
    item_name = [i.text for i in header if len(i.text) != 0]
    price = subpage_soup.find('span', class_='styles_price__3-xtw').text
    
    try:
        nutrition_link = driver.find_element(By.LINK_TEXT, "Nutrition Info")
        nutrition_link.click()
        sleep(2)
        driver.switch_to.frame(driver.find_element(By.CLASS_NAME, "styles_frame__1rZvs"))
        nutrition_info = driver.find_element(By.CLASS_NAME, 'nf')
        allergen_info = driver.find_element(By.CLASS_NAME, "allergenInfo")
        
        return {
            'item_name': item_name[0],
            'price': price,
            **nutrition_info_parsing(nutrition_info.text),
            **allergen_info_parsing(allergen_info.text)
        }
    except NoSuchElementException:
        return None

def pulling_all_data(driver, store_location="?store=038911#", base_url="https://www.tacobell.com"):
    data = []
    
    menu_links = get_menu_links(driver)
    for section in menu_links:
        driver.get(section)
        product_links = get_product_links(driver, base_url)
        
        for link in product_links:
            driver.get(link)
            item_data = extract_data_from_product_page(driver)
            if item_data:
                data.append(item_data)
                
    return data


In [24]:
driver = webdriver.Chrome()
data = pulling_all_data(driver)
driver.quit()


In [25]:
data 

[{'item_name': 'Soft Taco',
  'price': '$1.79',
  'Calories': 180,
  'Total Fat': '8g',
  'Saturated Fat': '4g',
  'Trans Fat': '0g',
  'Cholesterol': '25mg',
  'Sodium': '500mg',
  'Total Carbohydrates': '18g',
  'Dietary Fiber': '3g',
  'Sugars': '1g',
  'Includes': '<1g',
  'Protein': '9g',
  'Vitamin D': '0mcg',
  'Calcium': '110mg',
  'Iron': '1.7mg',
  'Potassium': '130mg',
  'Gluten': 1,
  'Milk': 1,
  'Wheat': 1,
  'Soy': 1,
  'Eggs': 0,
  'Fish': 0,
  'Shellfish': 0,
  'Tree Nuts': 0,
  'Peanuts': 0,
  'MSG': 0,
  'Sesame': 0},
 {'item_name': 'Soft Taco Supreme®',
  'price': '$2.69',
  'Calories': 210,
  'Total Fat': '10g',
  'Saturated Fat': '5g',
  'Trans Fat': '0g',
  'Cholesterol': '25mg',
  'Sodium': '510mg',
  'Total Carbohydrates': '20g',
  'Dietary Fiber': '3g',
  'Sugars': '2g',
  'Includes': '<1g',
  'Protein': '10g',
  'Vitamin D': '0mcg',
  'Calcium': '130mg',
  'Iron': '1.7mg',
  'Potassium': '200mg',
  'Gluten': 1,
  'Milk': 1,
  'Wheat': 1,
  'Soy': 1,
  'Eggs':

In [26]:
indv_items_data = pd.DataFrame(data)
indv_items_data.rename(columns={"Includes": "Added Sugars"}, inplace=True)

print(indv_items_data.shape)

(112, 28)


In [27]:
indv_items_data

Unnamed: 0,item_name,price,Calories,Total Fat,Saturated Fat,Trans Fat,Cholesterol,Sodium,Total Carbohydrates,Dietary Fiber,...,Milk,Wheat,Soy,Eggs,Fish,Shellfish,Tree Nuts,Peanuts,MSG,Sesame
0,Soft Taco,$1.79,180,8g,4g,0g,25mg,500mg,18g,3g,...,1.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,Soft Taco Supreme®,$2.69,210,10g,5g,0g,25mg,510mg,20g,3g,...,1.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,Spicy Potato Soft Taco,$1.00,240,12g,3g,0g,10mg,480mg,28g,2g,...,1.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0
3,Crunchy Taco,$1.79,170,10g,3.5g,0g,25mg,300mg,13g,3g,...,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,Crunchy Taco Supreme®,$2.69,190,11g,4.5g,0g,25mg,320mg,15g,3g,...,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
107,Breakfast Crunchwrap Sausage,$3.79,750,49g,16g,0g,145mg,1220mg,53g,4g,...,1.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0
108,Hash Brown,$1.69,160,11g,1g,0g,0mg,280mg,14g,1g,...,,,,,,,,,,
109,Cinnabon Delights® 2 Pack,$2.19,170,11g,3.5g,0g,5mg,70mg,15g,<1g,...,1.0,1.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0
110,Cinnabon Delights® 12 Pack,$6.89,1010,68g,22g,0g,40mg,430mg,88g,5g,...,1.0,1.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0


In [28]:
indv_items_data.to_csv("../data/uncleaned_taco_bell_indv_items.csv")

TODO next:
- Create a table for the Combo and Group meals