In [None]:
import pandas as pd
import numpy as np
from datetime import datetime, timedelta
import random

# Set random seed for reproducibility
np.random.seed(42)
random.seed(42)

# Generate 100 records (more than the required 50)
num_records = 100

# Product catalog
products = {
    'A101': {'name': 'Wireless Headphones', 'category': 'Electronics', 'base_price': 89.99},
    'B202': {'name': 'Bluetooth Speaker', 'category': 'Electronics', 'base_price': 49.99},
    'C303': {'name': 'Smart Watch', 'category': 'Electronics', 'base_price': 199.99},
    'D404': {'name': 'Phone Case', 'category': 'Accessories', 'base_price': 19.99},
    'E505': {'name': 'USB Cable', 'category': 'Accessories', 'base_price': 9.99},
    'F606': {'name': 'Screen Protector', 'category': 'Accessories', 'base_price': 12.99}
}

# Customer regions
regions = ['North', 'South', 'East', 'West']

# Generate synthetic sales data
data = []
start_date = datetime.now() - timedelta(days=60)

for i in range(num_records):
    # Choose a random product
    product_id = random.choice(list(products.keys()))
    product = products[product_id]
    
    # Generate transaction details
    transaction_id = 10000 + i
    customer_id = random.randint(1000, 1999)
    quantity = random.randint(1, 3)
    
    # Add some price variability
    price_variation = random.uniform(0.9, 1.1)  # ±10% variation
    unit_price = round(product['base_price'] * price_variation, 2)
    amount = round(unit_price * quantity, 2)
    
    # Generate timestamp (spread over last 60 days)
    days_ago = random.randint(0, 60)
    transaction_date = start_date + timedelta(days=days_ago)
    
    # Last updated is either transaction date or later (for returns/updates)
    last_updated = transaction_date + timedelta(hours=random.randint(0, 48))
    
    # Add some payment methods
    payment_method = random.choice(['Credit Card', 'Debit Card', 'PayPal', 'Cash'])
    
    # Add region
    region = random.choice(regions)
    
    data.append({
        'transaction_id': transaction_id,
        'customer_id': customer_id,
        'product_id': product_id,
        'product_name': product['name'],
        'category': product['category'],
        'quantity': quantity,
        'unit_price': unit_price,
        'amount': amount,
        'transaction_date': transaction_date.strftime('%Y-%m-%d %H:%M:%S'),
        'last_updated': last_updated.strftime('%Y-%m-%d %H:%M:%S'),
        'payment_method': payment_method,
        'region': region
    })

# Create DataFrame
df = pd.DataFrame(data)

# Save to CSV
df.to_csv('custom_data.csv', index=False)

print(f"Generated {len(df)} records of sales data in 'sales_transactions.csv'")
print("Sample data:")
print(df.head())