In [1]:
import slugify
from tqdm import tqdm

In [2]:
with open("phoenix_restaurant_urls.txt") as f:
    urls = f.read().splitlines()

In [3]:
urls = [url for url in urls if len(url.split("/")) == 6]

In [4]:
excluded_stores = ['walgreens', 'airshop', 'convenient-food-mart', 'liquor', 'convenience', 'food-mart', 'mart', 'groceries', 'rite-aid', 'mercado']

In [5]:
urls = [url for url in urls if not any([store in url for store in excluded_stores])]

In [62]:
def scrape_uber_eats_menu(url, recursion_depth = 0, menu_number = 1):

    driver.get(url)
    sleep(1)

    try:
        popup = driver.find_element_by_xpath("/html/body/div[1]/div/div[4]/div/div/div[2]/div[2]/button")
        popup.click()
    except:
        pass

    try:
        name = driver.find_element_by_xpath(
            "/html/body/div[1]/div/main/div[2]/div/div[3]/div[3]/div[1]/div[2]/div[2]/h1"
            ).text
        print(f"Looking up {name}...")
    except:
        print(f"No restaurant found at {url}")
        return

    city_state = driver.find_element_by_xpath("/html/body/div[1]/div/main").text
    match = re.search(",\s(\w+),\s([A-Z][A-Z])\s\d\d\d\d\d", city_state)
    city = match[1]
    state = match[2]
        
    menu_xpath = f"/html/body/div[1]/div/main/div[4]/ul[{menu_number}]"
    try:
        menu = driver.find_element_by_xpath(menu_xpath)
    except NoSuchElementException:
        print(f"No menu found at {url}")
        return

    # Etra menus
    if recursion_depth == 0:
        try:
            extra_menus = driver.find_element_by_xpath("/html/body/div[1]/div/main/div[4]/div[1]")
            extra_menu_links = extra_menus.find_elements_by_tag_name("a")[1:]
            extra_menu_urls = [link.get_property("href") for link in extra_menu_links]
        except NoSuchElementException:
            extra_menu_urls = []
            print("Only one menu found")
            pass

    item_location = driver.find_element_by_xpath(menu_xpath + "/li[1]/ul/li[1]/div/div/div/div[1]")

    menu_items = menu.find_elements_by_class_name(item_location.get_attribute("class").replace(" ", "."))

    item_name_class = driver.find_element_by_xpath(
        menu_xpath + "/li[1]/ul/li[1]/div/div/div/div[1]/div[1]/h4/div"
        ).get_attribute("class").replace(" ", ".")

    try:
        item_price_class = driver.find_element_by_xpath(
            menu_xpath + "/li[1]/ul/li[1]/div/div/div/div[1]/div[3]/div"
            ).get_attribute("class").replace(" ", ".")
    except NoSuchElementException:
        try: 
            item_price_class = driver.find_element_by_xpath(
                menu_xpath + "/li[1]/ul/li[1]/div/div/div/div[1]/div[2]/div"
                ).get_attribute("class").replace(" ", ".")
        except NoSuchElementException:
            print("No prices found.")
            return

    try:
        item_cals_class = driver.find_element_by_xpath(
            menu_xpath + "/li[1]/ul/li/div/div/div/div[1]/div[2]/div[2]"
            ).get_attribute("class").replace(" ", ".")
    except NoSuchElementException:
        item_cals_class = None

    items_dict = {'name': [], 'price_usd': [], 'calories': []}

    for item in menu_items:  

        calories = ''

        # Get item name  
        try: 
            item_name = item.find_element_by_class_name(item_name_class).text
            item_name = item_name.upper().strip()
        except NoSuchElementException:
            continue
        
        if " CAL " in item_name:
            item_name, calories_raw = item_name.split(" CAL ")
            try:
                calories_max = re.findall("\d+", calories_raw)[-1]
                calories = float(calories_max)
            except: 
                pass

        # Price
        try:
            item_price_text = item.find_element_by_class_name(item_price_class).text
            item_price_text_cleaned = item_price_text.lower().strip()
            if item_price_text_cleaned in ["customize", "unavailable", ""]:
                continue
            item_price = float(item_price_text_cleaned.replace("$", ""))
        except (NoSuchElementException, ValueError):
            continue
        
        # Calories
        if item_cals_class is not None and calories == '':
            try:
                calories_raw = item.find_elements_by_class_name(item_cals_class)[1].text
                calories_max = re.findall("\d+", calories_raw)[-1]
                calories = float(calories_max)
            except (NoSuchElementException, IndexError):
                pass

        items_dict['name'].append(item_name)
        items_dict['price_usd'].append(item_price)
        items_dict['calories'].append(calories)

    df = pd.DataFrame(items_dict)

    df['restaurant_name'] = name.upper().strip()
    df['identifier'] = f'UBEREATS, {city.upper()}, {state}'
    df['sugars_g'] = ''
    df['cholesterol_mg'] = ''
    df['protein_g'] = ''
    df['fiber_g'] = ''
    df['fat_g'] = ''
    df['carbohydrates_g'] = ''
    df['sodium_mg'] = ''

    if len(df) == 0:
        print("No prices or caloric information found.")
        return

    for char in invalid_chars:
        df['name'] = df['name'].str.replace(char, "")
        df['restaurant_name'] = df['restaurant_name'].str.replace(char, "")

    branch_name = f"UberEATS-{city.title()}-{name.title()}"
    slug = slugify.slugify(branch_name)

    print("="*10 + "Data sample" + "="*10)
    print(df.loc[:,"name":"identifier"].head(3).to_markdown())
    print("="*30)

    if recursion_depth == 0:
        for extra_menu_number, new_url in enumerate(extra_menu_urls):
            df_deep = scrape_uber_eats_menu(new_url, recursion_depth = 1, menu_number = extra_menu_number + 2)
            df = pd.concat([df, df_deep])
        
        df = df.drop_duplicates('name')
        df.to_csv(f"./all_cities/{slug}.csv", index = False)
    else:
        return df

In [63]:
from selenium import webdriver
driver = webdriver.Firefox()
driver.maximize_window()
from time import sleep
import pandas as pd
import re
from selenium.common.exceptions import NoSuchElementException
invalid_chars = [":registered:", ":tm:", ":copyright:","℠", "*", '"']

In [64]:
for url in tqdm(urls[770:]):
    scrape_uber_eats_menu(url)

  0%|          | 0/342 [00:00<?, ?it/s]Looking up Romeros Mexican Food...
|    | name                                   |   price_usd | calories   | restaurant_name      | identifier            |
|---:|:---------------------------------------|------------:|:-----------|:---------------------|:----------------------|
|  0 | CHILE RELLENO PLATE AND RICE AND BEANS |           7 |            | ROMEROS MEXICAN FOOD | UBEREATS, PHOENIX, AZ |
|  1 | GREEN CHILE BURRITO                    |           5 |            | ROMEROS MEXICAN FOOD | UBEREATS, PHOENIX, AZ |
|  2 | CARNITAS PLATE AND RICE AND BEANS      |           8 |            | ROMEROS MEXICAN FOOD | UBEREATS, PHOENIX, AZ |
Looking up Long Wong's...
|    | name              |   price_usd | calories   | restaurant_name   | identifier            |
|---:|:------------------|------------:|:-----------|:------------------|:----------------------|
|  0 | VALUE MEAL        |        9.29 |            | LONG WONG'S       | UBEREATS, PHOENIX, A

TypeError: scrape_uber_eats_menu() got multiple values for argument 'recursion_depth'