In [59]:
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import NoSuchElementException, TimeoutException
import logging
import pandas as pd

In [60]:
options = webdriver.ChromeOptions()
options.add_experimental_option("debuggerAddress", "127.0.0.1:9222")

In [61]:
driver = webdriver.Chrome(service=Service(executable_path="/opt/homebrew/bin/chromedriver"), options=options)

driver

<selenium.webdriver.chrome.webdriver.WebDriver (session="2567152ce3a5d0fa14faa77417980851")>

In [62]:
table_rows = driver.find_elements(By.TAG_NAME, 'tr')
type(table_rows)
len(table_rows)
# table_rows


15

In [63]:
def wait_for_element(locator, locator_type, timeout):
    try:
        WebDriverWait(driver, timeout).until(
            EC.visibility_of_element_located((locator_type, locator)))
        return True
    except TimeoutException:
        return False

In [64]:
def wait_for_and_find_element(locator, locator_type, timeout):
    try:
        wait = wait_for_element(locator, locator_type, timeout)
        if not wait:
            logging.error(f'Tried to wait to locate element via locator "{locator}", but timed out')
            return False, None
        element = driver.find_element(locator_type, locator)
        if not element:
            logging.error(f'Could not find element via locator "{locator}"')
            return True, None
        if wait and element:
            logging.info(f'Found and located element via locator "{locator}"')
            return True, element

    except Exception as NoSuchElementException:
        logging.exception(f'An unexpected error occurred: {NoSuchElementException}')


In [65]:
def find_element_and_click(locator ,locator_type=By.CSS_SELECTOR):
    """
    Finds element and clicks it using `WebElement.click()`
    :param locator:
    :param locator_type:
    :return: Tuple(bool, WebElement)
    """
    try:
        element = driver.find_element(locator_type, locator)
        if not element:
            logging.error(f'Could not locate element via locator "{locator}".')
        element.click()
        return True, element
    except NoSuchElementException:
        logging.exception(f'Element {locator} was not found.')
        return False, None
    except Exception as e:
        logging.exception(f'Error occurred when trying to find and click element with locator: "{locator}" resulting in error message: {str(e)}')
        return False, None


In [66]:
def scrape_wrapper():
    results = []
    for idx, table_row in enumerate(table_rows):
        if idx >= 1:
            table_row.click()
            found, elem = wait_for_and_find_element(locator="//*[@class='styles__SidesheetContent-sc-czzuxh-2 hKVVOI']", locator_type=By.XPATH, timeout=10)
            
            results.append(elem.text)
            
            exit_btn_clicked = find_element_and_click(locator='//*[@id="MerchantApp"]/div/div/div[3]/div[2]/div[2]/div/div/div[1]/nav/div[1]/div[1]/div/button', locator_type=By.XPATH)
            
            if exit_btn_clicked:
                logging.info(f'Exiting sidesheetbody for Order #: {idx}')
            
    return results


In [67]:
orders_content = scrape_wrapper()

In [68]:
orders_content

['Order: DF8E5AB6\nDelivered\nThe order was delivered at 10:56 PM on September 24, 2023. \nRate Dasher\nGet help\nDelivery Details\nCustomer\nNikolas W\nDasher\nTommy\nPicked Up\n10:35 PM\n(Quoted: 10:12 PM)\nSeptember 24, 2023\nDropped Off\n10:56 PM\n(Quoted: 10:35 PM)\nSeptember 24, 2023\nPick Up Location\n800 W Interstate 20, Big Spring, TX 79720, USA\nChannel\nDoorDash\nFulfillment\nDoorDash Delivery\nOrder Details\n1\n×\nTXB Water Bottle (16.9 oz) (Water TX)\n$1.99\n5\n×\nFountain Drink (Fountain, Frozen, & Tea)\nDrink Selection:\nDr Pepper\n$0.00\nSubtotal\n$1.99\nSubtotal Tax\n$0.00\nCommission (16%)\n-$0.32\nTotal Customer Refund\n-$0.00\nEstimated Payout\n$1.67\nAssociated Transactions (1)\nTransaction #8058359503 - Delivery\n$1.67',
 'Order: 1D4B1698\nDelivered\nThe order was delivered at 7:18 PM on September 24, 2023. \nRate Dasher\nGet help\nDelivery Details\nCustomer\nLisa C\nDasher\nEric\nPicked Up\n7:11 PM\n(Quoted: 6:57 PM)\nSeptember 24, 2023\nDropped Off\n7:18 PM\n(Qu

In [69]:
orders_content[0]

'Order: DF8E5AB6\nDelivered\nThe order was delivered at 10:56 PM on September 24, 2023. \nRate Dasher\nGet help\nDelivery Details\nCustomer\nNikolas W\nDasher\nTommy\nPicked Up\n10:35 PM\n(Quoted: 10:12 PM)\nSeptember 24, 2023\nDropped Off\n10:56 PM\n(Quoted: 10:35 PM)\nSeptember 24, 2023\nPick Up Location\n800 W Interstate 20, Big Spring, TX 79720, USA\nChannel\nDoorDash\nFulfillment\nDoorDash Delivery\nOrder Details\n1\n×\nTXB Water Bottle (16.9 oz) (Water TX)\n$1.99\n5\n×\nFountain Drink (Fountain, Frozen, & Tea)\nDrink Selection:\nDr Pepper\n$0.00\nSubtotal\n$1.99\nSubtotal Tax\n$0.00\nCommission (16%)\n-$0.32\nTotal Customer Refund\n-$0.00\nEstimated Payout\n$1.67\nAssociated Transactions (1)\nTransaction #8058359503 - Delivery\n$1.67'

In [76]:
def get_order_ids():
    
    for idx, order_text in enumerate(orders_content):
        
        # Extract the order_id
        order_id_start = orders_content[idx].find("Order:") + len("Order:")
        order_id_end = orders_content[idx].find("\n", order_id_start)
        order_id = orders_content[idx][order_id_start:order_id_end]
        
        return order_id

order_ids = get_order_ids()

' DF8E5AB6'

In [None]:
def create_per_order_sheet(orders):
    writer = pd.ExcelWriter('output.xlsx', engine='xlsxwriter')
    
    for i, orders_text in enumerate(orders):
        df = pd.DataFrame([orders_text.split('')])

In [78]:
def orders_spreadsheet_creator(lst_of_orders):
    # Create a Pandas Excel writer using XlsxWriter as the engine.
    writer = pd.ExcelWriter('output.xlsx', engine='xlsxwriter')
    
    # Loop through order_contents and create a sheet for each order
    for i, lst_of_orders in enumerate(lst_of_orders):
        # Convert the order content to a DataFrame
        df = pd.DataFrame([lst_of_orders.split('\n')], columns=["Order Content"])
        print(df)
        # Write the DataFrame to the Excel sheet
        # df.to_excel(writer, sheet_name=f"Order_{i + 1}", index=False)
    
    # Close the Pandas Excel writer and save the file
    # writer.save()


orders_spreadsheet_creator(orders_content)

ValueError: 1 columns passed, passed data had 48 columns