In [2]:
import pandas as pd
import numpy as np
import random
from datetime import datetime, timedelta

# Set random seed for reproducibility
np.random.seed(42)
random.seed(42)

# Define constants
NUM_ROWS = 1000
BRANDS = ['Apple', 'Samsung', 'Dell', 'HP', 'Lenovo', 'Google', 'Microsoft', 'Asus', 'Acer', 'Sony']
CATEGORIES = ['Smartphone', 'Tablet', 'Laptop']
REGIONS = ['North America', 'Europe', 'Asia', 'South America', 'Africa']
CUSTOMER_SEGMENTS = ['Individual', 'Business', 'Education']
PROMOTIONS = ['None', '10% Off', '20% Off', 'Free Shipping']

# Define real-world product series for each brand
PRODUCT_SERIES = {
    'Apple': ['iPhone 14 Pro', 'iPhone 14', 'iPhone 13', 'iPad Pro 12.9', 'iPad Air', 'MacBook Pro 16', 'MacBook Air M2'],
    'Samsung': ['Galaxy S23 Ultra', 'Galaxy S23', 'Galaxy Z Flip', 'Galaxy Tab S8', 'Galaxy Book3 Pro', 'Galaxy Book3 360'],
    'Dell': ['XPS 13 Plus', 'XPS 15', 'Inspiron 14', 'Inspiron 16', 'Alienware x16', 'Latitude 9440'],
    'HP': ['Spectre x360 14', 'Envy x360 13', 'Pavilion Aero 13', 'Omen 16', 'EliteBook 840 G9', 'ProBook 450 G9'],
    'Lenovo': ['ThinkPad X1 Carbon Gen 10', 'ThinkPad E14 Gen 4', 'Yoga 9i Gen 8', 'IdeaPad Slim 5', 'Legion 5 Pro', 'ThinkBook 14 Gen 4'],
    'Google': ['Pixel 7 Pro', 'Pixel 7', 'Pixel 6a', 'Pixel Tablet', 'Pixelbook Go'],
    'Microsoft': ['Surface Pro 9', 'Surface Laptop 5', 'Surface Go 4', 'Surface Studio 2+', 'Surface Duo 2'],
    'Asus': ['ROG Zephyrus G14', 'ROG Strix Scar 16', 'ZenBook 14X', 'VivoBook 15', 'TUF Gaming A15'],
    'Acer': ['Swift 5', 'Swift 3', 'Predator Helios 16', 'Aspire 5', 'Nitro 5'],
    'Sony': ['Xperia 1 IV', 'Xperia 5 IV', 'Xperia 10 IV', 'VAIO Z', 'VAIO SX14']
}

# Generate Sales Transactions Table
def generate_sales_transactions(num_rows):
    data = {
        'TransactionID': range(1, num_rows + 1),
        'ProductID': np.random.randint(1001, 1021, num_rows),
        'CustomerID': np.random.randint(2001, 2051, num_rows),
        'Region': np.random.choice(REGIONS, num_rows),
        'Date': [datetime(2023, 1, 1) + timedelta(days=random.randint(0, 364)) for _ in range(num_rows)],
        'Quantity': np.random.randint(1, 5, num_rows),
        'Price': np.round(np.random.uniform(200, 2000, num_rows), 2),
        'Discount': np.random.choice([0, 0.1, 0.2], num_rows),
        'Promotion': np.random.choice(PROMOTIONS, num_rows)
    }
    return pd.DataFrame(data)

# Generate Product Information Table
def generate_product_info():
    products = []
    for brand in BRANDS:
        for series in PRODUCT_SERIES[brand]:
            category = 'Smartphone' if 'iPhone' in series or 'Galaxy' in series or 'Pixel' in series or 'Xperia' in series else \
                       'Tablet' if 'iPad' in series or 'Tab' in series or 'Pixel Tablet' in series else 'Laptop'
            products.append({
                'ProductID': 1001 + len(products),
                'ProductName': series,
                'Brand': brand,
                'Category': category,
                'CostPrice': np.round(np.random.uniform(150, 1800), 2)
            })
    return pd.DataFrame(products)

# Generate Customer Information Table
def generate_customer_info():
    data = {
        'CustomerID': range(2001, 2051),
        'CustomerName': [f"Customer {i}" for i in range(2001, 2051)],
        'Region': np.random.choice(REGIONS, 50),
        'Segment': np.random.choice(CUSTOMER_SEGMENTS, 50)
    }
    return pd.DataFrame(data)

# Generate Regional Data Table
def generate_regional_data():
    data = {
        'Region': REGIONS,
        'Population (M)': np.round(np.random.uniform(50, 500, len(REGIONS)), 2),
        'GDP per Capita (USD)': np.round(np.random.uniform(1000, 50000, len(REGIONS)), 2)
    }
    return pd.DataFrame(data)

# Generate Promotions Table
def generate_promotions():
    data = {
        'PromotionID': range(1, 5),  # 4 promotions
        'PromotionName': ['10% Off', '20% Off', 'Free Shipping', 'Buy One Get One'],  # 4 promotion names
        'StartDate': [datetime(2023, 1, 1), datetime(2023, 4, 1), datetime(2023, 7, 1), datetime(2023, 10, 1)],  # 4 start dates
        'EndDate': [datetime(2023, 3, 31), datetime(2023, 6, 30), datetime(2023, 9, 30), datetime(2023, 12, 31)]  # 4 end dates
    }
    return pd.DataFrame(data)

# Generate all tables
sales_transactions = generate_sales_transactions(NUM_ROWS)
product_info = generate_product_info()
customer_info = generate_customer_info()
regional_data = generate_regional_data()
promotions = generate_promotions()

# Save to CSV (optional)
sales_transactions.to_csv('sales_transactions.csv', index=False)
product_info.to_csv('product_info.csv', index=False)
customer_info.to_csv('customer_info.csv', index=False)
regional_data.to_csv('regional_data.csv', index=False)
promotions.to_csv('promotions.csv', index=False)

# Display sample data
print("Sales Transactions:")
print(sales_transactions.head())
print("\nProduct Info:")
print(product_info.head())
print("\nCustomer Info:")
print(customer_info.head())
print("\nRegional Data:")
print(regional_data.head())
print("\nPromotions:")
print(promotions.head())

Sales Transactions:
   TransactionID  ProductID  CustomerID         Region       Date  Quantity  \
0              1       1007        2007  North America 2023-11-24         2   
1              2       1020        2018  North America 2023-02-27         4   
2              3       1015        2027  North America 2023-01-13         1   
3              4       1011        2034  North America 2023-05-21         2   
4              5       1008        2027         Africa 2023-05-06         4   

     Price  Discount Promotion  
0  1335.66       0.0   20% Off  
1  1005.81       0.1      None  
2   441.69       0.0   10% Off  
3  1924.28       0.2   20% Off  
4  1153.39       0.1   20% Off  

Product Info:
   ProductID    ProductName  Brand    Category  CostPrice
0       1001  iPhone 14 Pro  Apple  Smartphone     714.52
1       1002      iPhone 14  Apple  Smartphone    1736.56
2       1003      iPhone 13  Apple  Smartphone     884.01
3       1004  iPad Pro 12.9  Apple      Tablet     956.93
4 