In [1]:
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from webdriver_manager.chrome import ChromeDriverManager
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import json
import time
import pandas as pd

In [2]:
def setup_driver():
    # Get the path for the ChromeDriver
    driver_path = ChromeDriverManager().install()

    # Set up Chrome options or capabilities (if needed)
    chrome_options = webdriver.ChromeOptions()
    # chrome_options.add_argument("--headless")
    chrome_options.add_argument("--no-sandbox")
    chrome_options.add_argument("--disable-gpu")
    chrome_options.add_argument("--disable-dev-shm-usage")

    # Create a Chrome service with the driver path
    chrome_service = Service(driver_path)

    # Initialize the Chrome WebDriver with options and service
    driver = webdriver.Chrome(service=chrome_service, options=chrome_options)
    return driver

In [8]:
def scrape_menu_item(driver, url):
    driver.get(url)
    # Wait for the page to load completely (optional, but recommended)
    driver.implicitly_wait(10)  # Wait for up to 10 seconds for elements to appear

    # Get the store name
    wait = WebDriverWait(driver, 10)
    store_name_element = wait.until(EC.presence_of_element_located((By.CLASS_NAME, 'StoreAuthHeader__StoreName-ihWYeu')))
    # Get the text of the store name element
    store_name = store_name_element.text
    print(store_name)

    # Lists to store item details
    item_names = []
    item_descriptions = []
    item_prices = []
    item_category = []

    # Scroll down repeatedly until no new categories are loaded
    while True:
        categories = driver.find_elements(By.CSS_SELECTOR, 'div[data-index]')
        original_categories = set([category.find_element(By.CLASS_NAME, 'MuiListSubheader-root').text for category in categories])

        driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
        driver.implicitly_wait(1)  # Wait for new content to load

        categories = driver.find_elements(By.CSS_SELECTOR, 'div[data-index]')
        new_categories = set([category.find_element(By.CLASS_NAME, 'MuiListSubheader-root').text for category in categories])

        if new_categories == original_categories:
            break

    # Iterate through each category
    categories = driver.find_elements(By.CSS_SELECTOR, 'div[data-index]')
    for category in categories:
        category_element = category.find_element(By.CLASS_NAME, 'MuiListSubheader-root')
        category_name = category_element.text
        print("Category Name:", category_name)

        # Find all items within the current category
        menu_items = category.find_elements(By.CSS_SELECTOR, "li[data-test-id='menuItem']")
        
        # Iterate through each item and print its details
        for item in menu_items:
            item_name = item.find_element(By.CSS_SELECTOR, "[data-test-id='menuItem-name']").text
            try:
                item_description = item.find_element(By.CSS_SELECTOR, "[data-test-id='menuItem-description']").text
            except:
                item_description = None
            item_price = item.find_element(By.CSS_SELECTOR, "[data-test-id='menuItem-price']").text

            item_category.append(category_name)
            item_names.append(item_name)
            item_descriptions.append(item_description)
            item_prices.append(item_price)

            print(category_name, item_name, item_price)

        print()  # Add a newline for better readability between categories
    
    return store_name, item_category, item_names, item_descriptions, item_prices



In [5]:
df = pd.read_json("restaurant_final.json")
df.head()

Unnamed: 0,id,link
0,restaurant-link-qoC7OL4k,https://shop.ichefpos.com/store/qoC7OL4k/order...
1,restaurant-link-9xp07WHy,https://shop.ichefpos.com/store/9xp07WHy/order...
2,restaurant-link-mVDf1UEI,https://shop.ichefpos.com/store/mVDf1UEI/order...
3,restaurant-link-yBDARR35,https://shop.ichefpos.com/store/yBDARR35/order...
4,restaurant-link-VtzGseUu,https://shop.ichefpos.com/store/VtzGseUu/order...


In [9]:
driver_path = ChromeDriverManager().install()
# Initialize the Chrome driver
driver = setup_driver()
store_name_list, item_category_list, item_names_list, item_descriptions_list, item_prices_list = [], [], [], [], []
for a_store in range(df.shape[0]):
    print(f"The {a_store} store")
    # url = 'https://shop.ichefpos.com/store/CylxiF4S/ordering?utm_source=safer_takeout&utm_medium=website&utm_campaign=safer_takeout&utm_id=safer_takeout_01'
    url = df['link'][a_store]
    store_name, item_category, item_names, item_descriptions, item_prices = scrape_menu_item(driver, url)
    store_name_list.append(store_name)
    item_category_list.append(item_category)
    item_names_list.append(item_names)
    item_descriptions_list.append(item_descriptions)
    item_prices_list.append(item_prices)
    if a_store >= 4:
        break

# Close the browser
driver.quit()

# Create a new DataFrame to store the scraped data
scraped_data = pd.DataFrame({
    'Store_name': store_name_list,
    'Item_category': item_category_list,
    'Item_names': item_names_list,
    'Item_descriptions': item_descriptions_list,
    'Item_prices': item_prices_list
})

# Merge the scraped data with the original DataFrame df based on index or any other common column
df = pd.concat([df, scraped_data], axis=1)


The 0 store
老妹上菜 行動餐車
Category Name: 德腸熱狗堡
德腸熱狗堡 A經典美式雙醬 ['NT$85']
德腸熱狗堡 B雙色起司嫩蛋 ['NT$85', 'NT$105']
德腸熱狗堡 C狂野莎莎你好辣 ['NT$85', 'NT$105', 'NT$135']
德腸熱狗堡 D日式香料乾咖哩 ['NT$85', 'NT$105', 'NT$135', 'NT$140']
德腸熱狗堡 E闇黑胡椒豬五花 ['NT$85', 'NT$105', 'NT$135', 'NT$140', 'NT$140']

Category Name: 點點心
點點心 薯條脆脆 ['NT$85', 'NT$105', 'NT$135', 'NT$140', 'NT$140', 'NT$50']
點點心 半糖甜甜圈 ['NT$85', 'NT$105', 'NT$135', 'NT$140', 'NT$140', 'NT$50', 'NT$20']

The 1 store
麥町吐司工房淡水新市店
Category Name: 乳酪餅
乳酪餅 花生乳酪餅 ['NT$35']
乳酪餅 奶油乳酪餅 ['NT$35', 'NT$35']
乳酪餅 草莓乳酪餅 ['NT$35', 'NT$35', 'NT$35']
乳酪餅 巧克力乳酪餅 ['NT$35', 'NT$35', 'NT$35', 'NT$35']
乳酪餅 超司乳酪餅 ['NT$35', 'NT$35', 'NT$35', 'NT$35', 'NT$40']
乳酪餅 鮪魚乳酪餅 ['NT$35', 'NT$35', 'NT$35', 'NT$35', 'NT$40', 'NT$45']
乳酪餅 培根乳酪餅 ['NT$35', 'NT$35', 'NT$35', 'NT$35', 'NT$40', 'NT$45', 'NT$40']
乳酪餅 肉鬆乳酪餅 ['NT$35', 'NT$35', 'NT$35', 'NT$35', 'NT$40', 'NT$45', 'NT$40', 'NT$40']
乳酪餅 火腿乳酪餅 ['NT$35', 'NT$35', 'NT$35', 'NT$35', 'NT$40', 'NT$45', 'NT$40', 'NT$40', 'NT$40']
乳酪餅 雪花肉乳酪餅 ['NT$35'

KeyboardInterrupt: 