In [11]:
# Step 1: Import libraries
import pandas as pd  # For data manipulation and analysis
import numpy as np   # For numerical operations and array handling
import random        # For generating random numbers
from faker import Faker  # For creating fake data like names, addresses, etc.

In [12]:
# Initialize Faker for generating fake data
fake = Faker()

# Step 2: Define base lists

# Dictionary mapping product categories to their respective items
categories = {
    "Furniture": ["Office Chair", "Study Table", "Sofa", "Bookshelf", "Dining Table"],
    "Office Supplies": ["Pen", "Notebook", "Stapler", "File Folder", "Calculator"],
    "Electronics": ["Laptop", "Keyboard", "Mouse", "Headphones", "Monitor"],
    "Grocery": ["Rice Bag", "Cooking Oil", "Sugar", "Snacks", "Juice Pack"]
}

# List of geographical regions for data distribution
regions = ["North", "South", "East", "West"]

# Available payment methods for transactions
payment_modes = ["Cash", "Credit Card", "UPI", "Net Banking"]

# Possible statuses for order delivery
delivery_status = ["Delivered", "Pending", "Returned", "Cancelled"]

# Customer types for market segmentation
customer_segments = ["Consumer", "Corporate", "Home Office"]

In [13]:
# Step 3: Generate dataset

records = [] # Empty list to store all rows

for i in range(1000): # 1000 fake orders
    # Generate unique order ID
    order_id = f"ORD{1000 + i}"
    # Create random order date within last 2 years
    order_date = fake.date_between(start_date='-2y', end_date='today')
    # Set ship date 1-7 days after order date
    ship_date = order_date + pd.Timedelta(days=random.randint(1,7))

    # Generate customer information
    customer_name = fake.name()
    customer_id = f"CUST{random.randint(100,999)}"
    customer_segment = random.choice(customer_segments)

    # Generate product information
    category = random.choice(list(categories.keys()))
    product_name = random.choice(categories[category])
    product_id = f"PROD{random.randint(1000, 9999)}"

    # Generate location information
    region = random.choice(regions)
    state = fake.state()
    city = fake.city()

    # Generate order details
    quantity = random.randint(1, 10)
    unit_price = random.randint(100, 5000)
    discount = random.choice([0, 5, 10, 15, 20])

    # Calculate financial metrics
    sales_amount = quantity * unit_price * (1 - discount / 100)  # Fixed: added missing '*'
    cost_price = sales_amount * random.uniform(0.6, 0.9)  # Fixed: changed '.' to '*'
    profit = sales_amount - cost_price

    # Generate inventory information
    stock_left = random.randint(0, 50)

    # Determine reorder status based on stock level
    if stock_left < 10:
        auto_reorder = "Yes"
        reorder_quantity = random.randint(20, 50)
    else:
        auto_reorder = "No"
        reorder_quantity = 0

    # Generate supplier and delivery information
    supplier_name = fake.company()
    supplier_email = fake.company_email()
    payment_mode = random.choice(payment_modes)
    delivery = random.choice(delivery_status)

    # Append row as a dictionary
    records.append({
        "Order ID": order_id,
        "Order Date": order_date,
        "Ship Date": ship_date,
        "Customer ID": customer_id,
        "Customer Name": customer_name,
        "Customer Segment": customer_segment,
        "Product ID": product_id,
        "Product Name": product_name,
        "Category": category,
        "Region": region,
        "State": state,
        "City": city,
        "Quantity": quantity,
        "Unit Price": unit_price,
        "Discount (%)": discount,
        "Sales Amount": round(sales_amount, 2),
        "Cost Price": round(cost_price, 2),
        "Profit": round(profit, 2),
        "Payment Mode": payment_mode,
        "Delivery Status": delivery,
        "Supplier Name": supplier_name,
        "Supplier Email": supplier_email,
        "Stock Left": stock_left,
        "Auto Reorder": auto_reorder,
        "Reorder Quantity": reorder_quantity
    })

In [14]:
# Step 4: Create DataFrame and save to CSV

df = pd.DataFrame(records)  # Convert the records list to a pandas DataFrame
try:
    df.to_csv(r"C:\Users\himan\Documents\Superstore_Management_System.csv", index=False) # Save DataFrame to CSV without row indices
    print("Dataset generated successfully! File saved as 'Superstore_Management_System.csv'")
except PermissionError:  # Handle the case where the file is locked by another program
    print("Please close the file 'Superstore_Management_System.csv' if it's open in Excel or Power Bi")

Dataset generated successfully! File saved as 'Superstore_Management_System.csv'
