In [99]:
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import NoSuchElementException, TimeoutException
import logging
import pandas as pd
import re

In [100]:
options = webdriver.ChromeOptions()
options.add_experimental_option("debuggerAddress", "127.0.0.1:9222")

In [101]:
driver = webdriver.Chrome(service=Service(executable_path="/opt/homebrew/bin/chromedriver"), options=options)

driver

<selenium.webdriver.chrome.webdriver.WebDriver (session="f339c9c892ebc23852ca5f706d83a61e")>

In [102]:
table_rows = driver.find_elements(By.TAG_NAME, 'tr')
type(table_rows)
len(table_rows)
# table_rows


16

In [103]:
def wait_for_element(locator, locator_type, timeout):
    try:
        WebDriverWait(driver, timeout).until(
            EC.visibility_of_element_located((locator_type, locator)))
        return True
    except TimeoutException:
        return False

In [104]:
def wait_for_and_find_element(locator, locator_type, timeout):
    try:
        wait = wait_for_element(locator, locator_type, timeout)
        if not wait:
            logging.error(f'Tried to wait to locate element via locator "{locator}", but timed out')
            return False, None
        element = driver.find_element(locator_type, locator)
        if not element:
            logging.error(f'Could not find element via locator "{locator}"')
            return True, None
        if wait and element:
            logging.info(f'Found and located element via locator "{locator}"')
            return True, element

    except Exception as NoSuchElementException:
        logging.exception(f'An unexpected error occurred: {NoSuchElementException}')


In [105]:
def find_element_and_click(locator ,locator_type=By.CSS_SELECTOR):
    """
    Finds element and clicks it using `WebElement.click()`
    :param locator:
    :param locator_type:
    :return: Tuple(bool, WebElement)
    """
    try:
        element = driver.find_element(locator_type, locator)
        if not element:
            logging.error(f'Could not locate element via locator "{locator}".')
        element.click()
        return True, element
    except NoSuchElementException:
        logging.exception(f'Element {locator} was not found.')
        return False, None
    except Exception as e:
        logging.exception(f'Error occurred when trying to find and click element with locator: "{locator}" resulting in error message: {str(e)}')
        return False, None


In [106]:
# def scrape_wrapper():
#     results = []
#     for idx, table_row in enumerate(table_rows):
#         if idx >= 1:
#             table_row.click()
#             found, elem = wait_for_and_find_element(locator="//*[@class='styles__SidesheetContent-sc-czzuxh-2 hKVVOI']", locator_type=By.XPATH, timeout=10)
#             
#             results.append(elem.text)
#             
#             exit_btn_clicked = find_element_and_click(locator='//*[@id="MerchantApp"]/div/div/div[3]/div[2]/div[2]/div/div/div[1]/nav/div[1]/div[1]/div/button', locator_type=By.XPATH)
#             
#             if exit_btn_clicked:
#                 logging.info(f'Exiting sidesheetbody for Order #: {idx}')
#             
#     return results


In [110]:
def scrape_wrapper():
    results = []
    # words_to_remove = ['Dispute charge', 'Rate Dasher', 'Get help']

    for idx, table_row in enumerate(table_rows):
        if idx >= 1:
            table_row.click()
            found, elem = wait_for_and_find_element(locator="//*[@class='styles__SidesheetContent-sc-czzuxh-2 hKVVOI']", locator_type=By.XPATH, timeout=10)

            text = elem.text
            print(f'**************** text *********************\n {text} **************** text *********************\n')
            

            # Remove everything from 'Delivery Details' to 'Order Details'
            delivery_to_order_pattern = re.compile(r'Delivery Details(.*?)(?=Order Details|$)', re.DOTALL)
            text = re.sub(delivery_to_order_pattern, '', text).strip()

            # Remove words in 'words_to_remove' without affecting other words
            # for word in words_to_remove:
            #     text = re.sub(rf'\b{re.escape(word)}\b', '', text)

            results.append(text)

            exit_btn_clicked = find_element_and_click(locator='//*[@id="MerchantApp"]/div/div/div[3]/div[2]/div[2]/div/div/div[1]/nav/div[1]/div[1]/div/button', locator_type=By.XPATH)

            if exit_btn_clicked:
                logging.info(f'Exiting sidesheetbody for Order #: {idx}')

    return results


In [111]:
orders_content = scrape_wrapper()

**************** text *********************
 Order: 2ABD90DB
Delivered
The order was delivered at 11:57 PM on September 26, 2023. 
Rate Dasher
Get help
Delivery Details
Customer
Chenoa M
Dasher
Brianne
Picked Up
11:48 PM
(Quoted: 11:39 PM)
September 26, 2023
Dropped Off
11:57 PM
(Quoted: 12:00 AM)
September 26, 2023
Pick Up Location
1402 Williams Dr, Georgetown, TX 78628, USA
Channel
DoorDash
Fulfillment
DoorDash Delivery
Order Details
1
×
19 Crimes Cabernet Sauvignon 750ml Bottle (14% ABV) (Specialty Wine)
$13.79
Subtotal
$13.79
Subtotal Tax
$1.76
Commission (16%)
-$2.21
Total Customer Refund
-$0.00
Estimated Payout
$13.34
Associated Transactions (1)
Transaction #8072672500 - Delivery
$13.34 **************** text *********************
**************** text *********************
 Order: 42CEA1A6
Delivered
The order was delivered at 11:25 PM on September 26, 2023. 
Rate Dasher
Get help
Delivery Details
Customer
Chris G
Dasher
Nardeah
Picked Up
11:20 PM
(Quoted: 10:53 PM)
September 26, 2

In [112]:
orders_content

['Order: 2ABD90DB\nDelivered\nThe order was delivered at 11:57 PM on September 26, 2023. \nRate Dasher\nGet help\nDelivery Details\nCustomer\nChenoa M\nDasher\nBrianne\nPicked Up\n11:48 PM\n(Quoted: 11:39 PM)\nSeptember 26, 2023\nDropped Off\n11:57 PM\n(Quoted: 12:00 AM)\nSeptember 26, 2023\nPick Up Location\n1402 Williams Dr, Georgetown, TX 78628, USA\nChannel\nDoorDash\nFulfillment\nDoorDash Delivery\nOrder Details\n1\n×\n19 Crimes Cabernet Sauvignon 750ml Bottle (14% ABV) (Specialty Wine)\n$13.79\nSubtotal\n$13.79\nSubtotal Tax\n$1.76\nCommission (16%)\n-$2.21\nTotal Customer Refund\n-$0.00\nEstimated Payout\n$13.34\nAssociated Transactions (1)\nTransaction #8072672500 - Delivery\n$13.34',
 'Order: 42CEA1A6\nDelivered\nThe order was delivered at 11:25 PM on September 26, 2023. \nRate Dasher\nGet help\nDelivery Details\nCustomer\nChris G\nDasher\nNardeah\nPicked Up\n11:20 PM\n(Quoted: 10:53 PM)\nSeptember 26, 2023\nDropped Off\n11:25 PM\n(Quoted: 11:13 PM)\nSeptember 26, 2023\nPick U

In [94]:
orders_content_cleaned = [order.replace('\n', ' ') for order in orders_content]

In [None]:
orders_content_cleaned

In [None]:
list_of_keys = ['Order: ', 'Delivered', 'Delivery Details', 'Customer', 'Dasher', 'Picked Up', 'Dropped Off', 'Pick Up Location', 'Channel', 'Fulfillment', 'Order Details', r'\bSubtotal\b', 'Subtotal Tax', r'Commission\s*\(\d+\%\)', r'Total Customer Refund\b', 'Estimated Payout', r'Transaction #\d+', r'Associated Transactions \(\d+\)'] 

In [17]:
def get_order_id(order_text):
    # Extract the order_id
    order_id_start = order_text.find("Order:") + len("Order:")
    order_id_end = order_text.find("\n", order_id_start)
    order_id = order_text[order_id_start:order_id_end]
    return order_id

In [92]:
def orders_spreadsheet_creator(orders):
    
    # Create a Pandas Excel writer using XlsxWriter as the engine.
    with pd.ExcelWriter('output.xlsx', engine='xlsxwriter') as writer:
        
        # Loop through order_contents and create a sheet for each order
        for idx, order_text in enumerate(orders, start=1):
            
            # get order id
            order_id = get_order_id(order_text)
            
            # Convert the order text to a DataFrame
            df = pd.DataFrame(orders, columns=[f'{order_id}'])
            
            # Write the DataFrame to the Excel sheet with the order ID as the sheet name
            # TODO: store num getter func 
            df.to_excel(writer, sheet_name=f"store_num", index=False)
            
    # Close the Pandas Excel writer and save the file
    writer.save() #todo: AttributeError: 'XlsxWriter' object has no attribute 'save'


orders_spreadsheet_creator(orders_content)

AttributeError: 'XlsxWriter' object has no attribute 'save'

In [98]:
def create_excel_from_orders(order_strings):
    # Create a Pandas Excel writer using XlsxWriter as the engine.
    with pd.ExcelWriter('output.xlsx', engine='xlsxwriter') as writer:
        for idx, order_string in enumerate(order_strings, start=1):
            # Create a DataFrame with the order string
            df = pd.DataFrame({'Order Content': [order_string]})
            df.to_clipboard()
            
            
            
            # Write the DataFrame to the Excel sheet with a sheet name based on the index
            # df.to_excel(writer, sheet_name=f"Order_{idx}", index=False)
    
    # Save the Excel file

# Example usage with a list of order strings
order_strings_with_delimiter = [
    'Order: DF8E5AB6 Delivered The order was delivered at 10:56 PM on September 24, 2023.  Rate Dasher Get help...',
    'Order: 1D4B1698 Delivered The order was delivered at 7:18 PM on September 24, 2023.  Rate Dasher Get help...'
]

create_excel_from_orders(order_strings_with_delimiter)
