In [6]:
import re
from collections import Counter

def main():
    
    # Read the CSV file with the product orders
    with open('./csv/orders.csv') as f_in:
        text = f_in.read()

    # Define the regular expression to extract all order numbers
    order_regex = r'\d+'
    orders = re.findall(order_regex, text)

    # Define the regular expression to extract product names (assuming they are alphabetic or alphanumeric words)
    product_regex = r'[A-Za-z0-9 ]+'
    products = re.findall(product_regex, text)

    # Define the regular expression to extract prices (e.g. 199.99)
    price_regex = r'\d+\.\d{2}'
    prices = re.findall(price_regex, text)

    # Define the regular expression to extract dates (e.g. 2023-01-10)
    date_regex = r'\d{4}-\d{2}-\d{2}'
    dates = re.findall(date_regex, text)

    # Find orders for products priced over $500
    expensive_orders = [price for price in prices if float(price) > 500]

    # Change the date format to DD/MM/YYYY
    formatted_dates = [re.sub(r'(\d{4})-(\d{2})-(\d{2})', r'\3/\2/\1', date) for date in dates]

    # Extract product names that have more than 6 characters
    long_products = [product for product in products if len(product) > 6]

    # Count the occurrences of each product
    product_counts = Counter(products)

    # Extract the orders with prices ending in .99
    prices_ending_in_99 = [price for price in prices if price.endswith('.99')]

    # Find the cheapest product
    cheapest_product = min(zip(prices, products), key=lambda x: float(x[0]))

    # Print the results
    print(f'Orders: {orders}')
    print(f'Products: {products}')
    print(f'Prices: {prices}')
    print(f'Dates: {dates}')
    print(f'Expensive orders (> $500): {expensive_orders}')
    print(f'Formatted dates (DD/MM/YYYY): {formatted_dates}')
    print(f'Products with more than 6 characters: {long_products}')
    print(f'Product counts: {product_counts}')
    print(f'Orders with prices ending in .99: {prices_ending_in_99}')
    print(f'Cheapest product: {cheapest_product}')

if __name__ == '__main__':
    main()


Orders: ['12345', '899', '99', '2023', '09', '15', '12346', '199', '23', '2023', '09', '16', '12347', '699', '99', '2023', '09', '17', '12348', '299', '99', '2023', '09', '18', '12349', '49', '99', '2023', '09', '19', '12350', '29', '99', '2023', '09', '20', '12351', '499', '49', '2023', '09', '21', '12352', '999', '99', '2023', '09', '22', '12353', '549', '29', '2023', '09', '23', '12354', '249', '57', '2023', '09', '24']
Products: ['Order ', '12345', ' Product', ' Laptop', ' Price', ' ', '899', '99', ' Date', ' 2023', '09', '15', 'Order ', '12346', ' Product', ' Headphones', ' Price', ' ', '199', '23', ' Date', ' 2023', '09', '16', 'Order ', '12347', ' Product', ' Smartphone', ' Price', ' ', '699', '99', ' Date', ' 2023', '09', '17', 'Order ', '12348', ' Product', ' Monitor', ' Price', ' ', '299', '99', ' Date', ' 2023', '09', '18', 'Order ', '12349', ' Product', ' Keyboard', ' Price', ' ', '49', '99', ' Date', ' 2023', '09', '19', 'Order ', '12350', ' Product', ' Mouse', ' Price', '