In [77]:
from orders import orders
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import NoSuchElementException, TimeoutException
import logging
import pandas as pd
import re


In [88]:
def clean_order_text(order):
    # Define the patterns in a list
    patterns = [
        re.compile(r'(Rate Dasher|Learn More|Get help).*?(?=Pick Up Location)', re.DOTALL),
        re.compile(r'Channel.*?(?=Order Details)', re.DOTALL),
        re.compile(r'Associated Transactions \(\d+\).*?(?=Transaction #\d+ - Delivery)', re.DOTALL),
        re.compile(r'Associated Transactions \(\d+\).*?(?=Transaction #\d+ - Pickup)', re.DOTALL)
        
    ]
    
    # Apply each pattern
    for pattern in patterns:
        order = re.sub(pattern, '', order).strip()
    
    # Replace newline characters with spaces
    order = order.replace('\n', ' ')
    
    return order


In [89]:
def clean_orders(orders):
    
    results = []
    
    for order in orders:
        order = clean_order_text(order)
        
        results.append(order)
    return results

In [90]:
results = clean_orders(orders)

In [92]:
results

['Order: 2ABD90DB Delivered The order was delivered at 11:57 PM on September 26, 2023.  Pick Up Location 1402 Williams Dr, Georgetown, TX 78628, USA Order Details 1 × 19 Crimes Cabernet Sauvignon 750ml Bottle (14% ABV) (Specialty Wine) $13.79 Subtotal $13.79 Subtotal Tax $1.76 Commission (16%) -$2.21 Total Customer Refund -$0.00 Estimated Payout $13.34 Transaction #8072672500 - Delivery $13.34',
 'Order: 42CEA1A6 Delivered The order was delivered at 11:25 PM on September 26, 2023.  Pick Up Location 1301 N U.S. Hwy 281, Marble Falls, TX 78654, USA Order Details 1 × Ice Breakers Mint Coolmint (1.5 oz) (Mint TX) $5.69 Subtotal $5.69 Subtotal Tax $0.73 Commission (20%) -$1.14 Total Customer Refund -$0.00 Estimated Payout $5.28 Transaction #8072574397 - Delivery $5.28',
 'Order: 6AB38CB1 Delivered The order was delivered at 11:24 PM on September 26, 2023.  Pick Up Location 1301 N U.S. Hwy 281, Marble Falls, TX 78654, USA Order Details 2 × Ice Breakers Mint Coolmint (1.5 oz) (Mint TX) $11.38

In [54]:
def parse_menu_items(price_as_value):
    # Initialize a dictionary for the nested order details
    item_name_to_item_price = {}
    
    # Regular expression pattern to match an item and its corresponding price
    pattern = re.compile(r'(.+?) (\$\d+\.\d+)')
    
    # Find all matches in the string
    matches = pattern.findall(price_as_value)
    
    # Loop through all the matches and populate the dictionary
    for item_name, item_price in matches:
        item_name_to_item_price[item_name] = item_price
    
    return item_name_to_item_price

In [67]:
def parse_order(order_str):
    # Define the known keys with their regex patterns
    known_keys = [r'\bOrder: \b', r'\bDelivered\b', r'\bPick Up Location\b', r'\bOrder Details\b', r'\bSubtotal\b(?=\s[^a-zA-Z])', r'\bSubtotal\sTax\b', r'Commission \(\d+%\)', r'\bTotal Customer Refund\b', r'\bEstimated Payout\b', r'\bAssociated Transactions \(\d+%\)', r'Transaction #\d+ - Delivery']

    # Initialize the dictionary to store our parsed values
    order_dict = {}
    
    # Find positions of known keys
    positions = []
    for key in known_keys:
        for m in re.finditer(key, order_str):
            positions.append((m.start(), m.end(), m.group()))
    
    # Sort positions by their start index
    positions.sort(key=lambda x: x[0])
    # print(positions)
    
    # Create segments based on positions
    segments = []
    
    for i in range(len(positions)):
        start = positions[i][1]
        end = positions[i + 1][0] if i + 1 < len(positions) else len(order_str)
        key = positions[i][2]
        value = order_str[start:end].strip().split(' ')[0] if 'Estimated Payout' in key else order_str[start:end].strip()
        # print(f'value: {value}')
        # print(f'key: {key}| value: {value}')
        segments.append((key, value))
    # print(f'segments: {segments}')

    # Parse segments into dictionary
    for key, value in segments:
        # print(f'key: {key}| value: {value}')
        if key == "Order Details":
            value = parse_menu_items(value)
            # print(f'*** {value}')
        order_dict[key.replace(':', '').strip()] = value
        # print(f'order_dict: {order_dict}')

    return order_dict

# Your cleaned output
cleaned_order = 'Order: ABDD1702 Delivered The order was delivered at 12:00 AM on September 26, 2023. Pick Up Location 5004 Wesley St, Greenville, TX 75402, USA Order Details 1 × Coors Original Beer Bottle (12oz x 6ct) (Domestic Beer NT) $14.99 3 × Cheers $69.99 Subtotal $14.99 Subtotal Tax $1.91 Commission (23%) -$3.45 Total Customer Refund -$0.00 Estimated Payout $13.45 Associated Transactions (1) Transaction #8065637009 - Delivery $13.45'

# Call our parse_order function and print the result
parsed_dict = parse_order(cleaned_order)
print(parsed_dict)


{'Order': 'ABDD1702', 'Delivered': 'The order was delivered at 12:00 AM on September 26, 2023.', 'Pick Up Location': '5004 Wesley St, Greenville, TX 75402, USA', 'Order Details': {'1 × Coors Original Beer Bottle (12oz x 6ct) (Domestic Beer NT)': '$14.99', ' 3 × Cheers': '$69.99'}, 'Subtotal': '$14.99', 'Subtotal Tax': '$1.91', 'Commission (23%)': '-$3.45', 'Total Customer Refund': '-$0.00', 'Estimated Payout': '$13.45', 'Transaction #8065637009 - Delivery': '$13.45'}
