In [10]:
# Cell 1: Import Libraries

# Import libraries
import pandas as pd
import numpy as np
from faker import Faker
import random
from datetime import datetime, timedelta
import ollama  # Optional: for generating product descriptions
import os


# Initialize Faker
fake = Faker()

# Set random seed for reproducibility
np.random.seed(42)
random.seed(42)



# Create directories if they don't exist
os.makedirs('data', exist_ok=True)
os.makedirs('output', exist_ok=True)

In [11]:
# Cell 2: Generate Amazon Product Data


# Function to generate Amazon-style Order ID
def generate_order_id():
    """Generate a unique Amazon-style Order ID (e.g., 123-4567890-1234567)."""
    return f"{random.randint(100, 999)}-{random.randint(1000000, 9999999)}-{random.randint(1000000, 9999999)}"

# Function to generate ASIN (Amazon Standard Identification Number)
def generate_asin():
    """Generate a 10-character ASIN."""
    return ''.join(random.choices('ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789', k=10))

# Function to generate product name (optionally using Ollama)
def generate_product_name(use_ollama=False):
    """Generate a product name using Ollama or a static list."""
    if use_ollama:
        try:
            response = ollama.generate(model='llama3', prompt='Generate a realistic Amazon product name (e.g., electronics, books, or household item). Keep it concise.')
            return response['response'].strip()
        except Exception as e:
            print(f"Ollama error: {e}. Falling back to static list.")
            return random.choice(product_list)
    return random.choice(product_list)

# Static product list
product_list = [
    "Wireless Bluetooth Earbuds",
    "4K Ultra HD Smart TV",
    "Stainless Steel Kitchen Knife Set",
    "Hardcover Fiction Novel",
    "Smart Home Security Camera",
    "Portable Laptop Stand",
    "Organic Cotton Bed Sheets",
    "High-Performance Gaming Mouse",
    "Reusable Water Bottle",
    "Electric Toothbrush"
]

In [12]:
# Cell 3: Generate Synthetic Order Data


# Function
def generate_orders(n=100, use_ollama=False):
    """Generate a list of synthetic Amazon orders."""
    orders = []
    for _ in range(n):
        order_id = generate_order_id()
        order_date = fake.date_time_between(start_date="-1y", end_date=datetime.now())
        customer_name = fake.name()
        customer_email = fake.email()
        shipping_address = fake.address().replace("\n", ", ")
        num_items = random.randint(1, 5)

        # Generate items
        items = []
        subtotal = 0
        for _ in range(num_items):
            product_name = generate_product_name(use_ollama)
            asin = generate_asin()
            price = round(random.uniform(5.99, 199.99), 2)
            quantity = random.randint(1, 3)
            items.append({'product_name': product_name, 'asin': asin, 'price': price, 'quantity': quantity})
            subtotal += price * quantity

        # Shipping and tax
        shipping_method = random.choice(["Prime 2-Day", "Standard Shipping", "Expedited Shipping"])
        shipping_cost = 0 if shipping_method == "Prime 2-Day" else round(random.uniform(3.99, 12.99), 2)
        tax = round(subtotal * random.uniform(0.05, 0.1), 2)
        total = round(subtotal + shipping_cost + tax, 2)
        status = random.choice(["Pending", "Shipped", "Delivered"])
        delivery_date = order_date + timedelta(days=random.randint(1, 7)) if status == "Delivered" else None

        orders.append({
            'order_id': order_id,
            'order_date': order_date,
            'customer_name': customer_name,
            'customer_email': customer_email,
            'shipping_address': shipping_address,
            'items': items,
            'subtotal': subtotal,
            'shipping_method': shipping_method,
            'shipping_cost': shipping_cost,
            'tax': tax,
            'total': total,
            'status': status,
            'delivery_date': delivery_date
        })
    return orders

# Generate orders
orders = generate_orders(n=100, use_ollama=False)

In [13]:
# Cell 4: Payment Data
def generate_invoices(orders):
    """Generate a list of invoices from orders."""
    invoices = []
    for order in orders:
        invoice_id = f"INV-{order['order_id'].split('-')[1]}"
        invoice_date = order['order_date'] + timedelta(days=random.randint(0, 2))
        billing_address = order['shipping_address']
        payment_method = random.choice(["Credit Card", "Amazon Pay", "Gift Card"])

        invoices.append({
            'invoice_id': invoice_id,
            'order_id': order['order_id'],
            'invoice_date': invoice_date,
            'customer_name': order['customer_name'],
            'billing_address': billing_address,
            'items': order['items'],
            'subtotal': order['subtotal'],
            'shipping_cost': order['shipping_cost'],
            'tax': order['tax'],
            'total': order['total'],
            'payment_method': payment_method
        })
    return invoices

def flatten_data(data, data_type):
    """Flatten nested items into a tabular format for orders or invoices."""
    flattened = []
    for record in data:
        for item in record['items']:
            flat_record = {
                'id': record['order_id'] if data_type == 'orders' else record['invoice_id'],
                'order_id': record['order_id'],
                'date': record['order_date'] if data_type == 'orders' else record['invoice_date'],
                'customer_name': record['customer_name'],
                'address': record['shipping_address'] if data_type == 'orders' else record['billing_address'],
                'product_name': item['product_name'],
                'asin': item['asin'],
                'price': item['price'],
                'quantity': item['quantity'],
                'subtotal': record['subtotal'],
                'shipping_cost': record['shipping_cost'],
                'tax': record['tax'],
                'total': record['total'],
                'status': record.get('status') if data_type == 'orders' else record['payment_method']
            }
            flattened.append(flat_record)
    return flattened

# Generate invoices
invoices = generate_invoices(orders)

# Flatten and create DataFrames
orders_flat = flatten_data(orders, 'orders')
invoices_flat = flatten_data(invoices, 'invoices')
orders_df = pd.DataFrame(orders_flat)
invoices_df = pd.DataFrame(invoices_flat)

# Save to CSV in data folder
orders_df.to_csv('data/orders.csv', index=False)
invoices_df.to_csv('data/invoices.csv', index=False)

In [14]:
# Cell 4a: Check DataFrame

print("Sample Orders Data:")
display(orders_df.head())
print("\nSample Invoices Data:")
display(invoices_df.head())

Sample Orders Data:


Unnamed: 0,id,order_id,date,customer_name,address,product_name,asin,price,quantity,subtotal,shipping_cost,tax,total,status
0,754-2867825-1419610,754-2867825-1419610,2024-08-24 13:48:59,Adam Davis,"34046 Montgomery Lodge, Blevinston, MD 54032",Hardcover Fiction Novel,I0Y6DPBHSA,44.56,3,540.86,5.72,28.92,575.5,Delivered
1,754-2867825-1419610,754-2867825-1419610,2024-08-24 13:48:59,Adam Davis,"34046 Montgomery Lodge, Blevinston, MD 54032",Reusable Water Bottle,PQK51FPKH1,25.82,2,540.86,5.72,28.92,575.5,Delivered
2,754-2867825-1419610,754-2867825-1419610,2024-08-24 13:48:59,Adam Davis,"34046 Montgomery Lodge, Blevinston, MD 54032",4K Ultra HD Smart TV,MMJBQE7CKW,177.77,2,540.86,5.72,28.92,575.5,Delivered
3,891-5855124-2338687,891-5855124-2338687,2025-05-02 02:45:02,Joseph Lopez,"3277 Thomas Valley Apt. 641, West Daniel, CA 1...",4K Ultra HD Smart TV,NQ4FMYZYCW,109.61,1,193.43,0.0,18.5,211.93,Delivered
4,891-5855124-2338687,891-5855124-2338687,2025-05-02 02:45:02,Joseph Lopez,"3277 Thomas Valley Apt. 641, West Daniel, CA 1...",Stainless Steel Kitchen Knife Set,QJ7YHL1C32,83.82,1,193.43,0.0,18.5,211.93,Delivered



Sample Invoices Data:


Unnamed: 0,id,order_id,date,customer_name,address,product_name,asin,price,quantity,subtotal,shipping_cost,tax,total,status
0,INV-2867825,754-2867825-1419610,2024-08-25 13:48:59,Adam Davis,"34046 Montgomery Lodge, Blevinston, MD 54032",Hardcover Fiction Novel,I0Y6DPBHSA,44.56,3,540.86,5.72,28.92,575.5,Amazon Pay
1,INV-2867825,754-2867825-1419610,2024-08-25 13:48:59,Adam Davis,"34046 Montgomery Lodge, Blevinston, MD 54032",Reusable Water Bottle,PQK51FPKH1,25.82,2,540.86,5.72,28.92,575.5,Amazon Pay
2,INV-2867825,754-2867825-1419610,2024-08-25 13:48:59,Adam Davis,"34046 Montgomery Lodge, Blevinston, MD 54032",4K Ultra HD Smart TV,MMJBQE7CKW,177.77,2,540.86,5.72,28.92,575.5,Amazon Pay
3,INV-5855124,891-5855124-2338687,2025-05-03 02:45:02,Joseph Lopez,"3277 Thomas Valley Apt. 641, West Daniel, CA 1...",4K Ultra HD Smart TV,NQ4FMYZYCW,109.61,1,193.43,0.0,18.5,211.93,Amazon Pay
4,INV-5855124,891-5855124-2338687,2025-05-03 02:45:02,Joseph Lopez,"3277 Thomas Valley Apt. 641, West Daniel, CA 1...",Stainless Steel Kitchen Knife Set,QJ7YHL1C32,83.82,1,193.43,0.0,18.5,211.93,Amazon Pay


In [15]:
# Cell 5: Populate HTML with Synthetic Data

from jinja2 import Template
from datetime import datetime, timedelta

def calculate_delivery_date(order_date):
    """Calculate estimated delivery date for Express shipping (1-2 days)."""
    return order_date + timedelta(days=random.randint(1, 2))

# Load HTML template and populate with data
order = orders_df.iloc[0]  # Example: first order
delivery_date = calculate_delivery_date(order['date'])
formatted_delivery_date = f"{delivery_date.strftime('%A, %B %d, %Y')} - {delivery_date.strftime('%A, %B %d, %Y')}"
shipping_speed = "Express"  # Assuming Express based on the image

with open('amazon_order_template.html', 'r') as f:
    html_template = Template(f.read())
html_output = html_template.render(
    order_id=order['order_id'],
    customer_name=order['customer_name'],
    delivery_date=formatted_delivery_date,
    shipping_speed=shipping_speed,
    item_subtotal=f"${order['subtotal']:.2f}",
    shipping_handling=f"${order['shipping_cost']:.2f}",
    total_before_tax=f"${order['subtotal'] + order['shipping_cost']:.2f}",
    estimated_tax=f"${order['tax']:.2f}",
    order_total=f"${order['total']:.2f}"
)

# Save HTML to output folder
with open('output/order_confirmation.html', 'w') as f:
    f.write(html_output)
print("Generated output/order_confirmation.html")

Generated output/order_confirmation.html
