In [3]:
import pandas as pd
import numpy as np
from datetime import datetime, timedelta
import random

# -----------------------------
# Configuration
# -----------------------------
np.random.seed(42)

num_manufacturers = 3
num_distributors = 10
num_retailers = 50
num_products = 5
start_date = datetime(2025, 9, 1)
end_date = datetime(2025, 9, 30)

# -----------------------------
# Entities
# -----------------------------
manufacturers = [f"MF_{i+1}" for i in range(num_manufacturers)]
distributors = [f"DS_{i+1}" for i in range(num_distributors)]
retailers = [f"RT_{i+1}" for i in range(num_retailers)]
products = [f"Product_{i+1}" for i in range(num_products)]

product_prices = {
    "Product_1": 150000,
    "Product_2": 300000,
    "Product_3": 200000,
    "Product_4": 250000,
    "Product_5": 180000
}

# -----------------------------
# Generate Sales Records
# -----------------------------
records = []
dates = pd.date_range(start_date, end_date)

for date in dates:
    for _ in range(2000):  # number of transactions per day
        mf = random.choice(manufacturers)
        ds = random.choice(distributors)
        rt = random.choice(retailers)
        prod = random.choice(products)

        base_price = product_prices[prod]
        qty = np.random.randint(1, 100)
        discount = np.random.uniform(0.0, 0.2)
        sale_price = base_price * (1 - discount)
        GMV = qty * sale_price
        COGS = GMV * np.random.uniform(0.6, 0.9)
        profit = GMV - COGS

        records.append([
            date.strftime("%Y-%m-%d"),
            mf,
            ds,
            rt,
            prod,
            qty,
            round(base_price, 2),
            round(sale_price, 2),
            round(GMV, 2),
            round(COGS, 2),
            round(profit, 2),
            round(discount * 100, 2)
        ])

# -----------------------------
# Create DataFrame
# -----------------------------
df = pd.DataFrame(records, columns=[
    "date",
    "manufacturer",
    "distributor",
    "retailer",
    "product",
    "quantity",
    "base_price",
    "sale_price",
    "GMV",
    "COGS",
    "profit",
    "discount_percent"
])

# -----------------------------
# Save to CSV
# -----------------------------

print(f"âœ… Generated {len(df):,} rows of data")
print("ðŸ’¾ Saved as data/supply_chain_sales.csv")

df.head()


âœ… Generated 60,000 rows of data
ðŸ’¾ Saved as data/supply_chain_sales.csv


Unnamed: 0,date,manufacturer,distributor,retailer,product,quantity,base_price,sale_price,GMV,COGS,profit,discount_percent
0,2025-09-01,MF_3,DS_6,RT_44,Product_2,52,300000,242957.14,12633771.36,10354616.05,2279155.32,19.01
1,2025-09-01,MF_1,DS_8,RT_32,Product_5,61,180000,158513.39,9669317.05,7094859.7,2574457.35,11.94
2,2025-09-01,MF_2,DS_4,RT_24,Product_1,87,150000,148257.49,12898401.77,11090727.44,1807674.33,1.16
3,2025-09-01,MF_3,DS_9,RT_19,Product_1,24,150000,130473.35,3131360.3,1931809.67,1199550.63,13.02
4,2025-09-01,MF_2,DS_10,RT_49,Product_4,88,250000,208377.87,18337252.38,12170466.19,6166786.19,16.65


In [4]:
df.to_csv("../Data/supply_chain_sales.csv", index=False)