In [1]:
import pandas as pd 
import numpy as np 
import os
import datetime
import random
from datetime import datetime, timedelta

In [2]:
WAREHOUSES = [
    "Paris", "Lyon", "Marseille", "Berlin", "Madrid", "Barcelona",
    "Warsaw", "Prague", "Amsterdam", "Rome", "Lisbon", "Vienna", "Brussels"
]

SKUS = [f"P{1000 + i}" for i in range(100)]  # 100 SKUs

DAYS = 365  # 1 year of data

In [3]:
OUTPUT_DIR = "data"
os.makedirs(OUTPUT_DIR, exist_ok=True)

def generate_dates(n_days):
    return [(datetime.today() - timedelta(days=i)).strftime("%Y-%m-%d") for i in range(n_days)][::-1]

def generate_large_demand():
    rows = []
    date_range = generate_dates(DAYS)
    for sku in SKUS:
        base = np.random.randint(50, 500)
        seasonality = np.sin(np.linspace(0, 2 * np.pi, DAYS)) * random.randint(5, 50)

        for wh in WAREHOUSES:
            for i, date in enumerate(date_range):
                noise = np.random.normal(0, 15)
                daily = base + seasonality[i % DAYS] + noise
                demand = int(max(0, daily))
                rows.append([sku, wh, date, demand])
    df = pd.DataFrame(rows, columns=['sku', 'warehouse', 'date', 'demand_units'])
    df.to_csv('data/demand.csv', index=False)

In [None]:
def generate_demand():
    date_range = generate_dates(DAYS)
    rows = []
    for sku in SKUS:
        base_demand = np.random.randint(50, 500)
        seasonality = np.sin(np.linspace(0, 2 * np.pi, DAYS)) * random.randint(5, 50)
        for wh in WAREHOUSES:
            for i, date in enumerate(date_range):
                noise = np.random.normal(0, 15)
                demand = int(max(0, base_demand + seasonality[i] + noise))
                rows.append([sku, wh, date, demand])
    df = pd.DataFrame(rows, columns=['sku', 'warehouse', 'date', 'demand_units'])
    df.to_csv(f"{OUTPUT_DIR}/demand.csv", index=False)

def generate_inventory():
    date_range = generate_dates(DAYS)
    rows = []
    for sku in SKUS:
        for wh in WAREHOUSES:
            for date in date_range:
                stock = np.random.randint(50, 600)
                rows.append([sku, wh, date, stock])
    df = pd.DataFrame(rows, columns=['sku', 'warehouse', 'date', 'stock_level'])
    df.to_csv(f"{OUTPUT_DIR}/inventory.csv", index=False)

def generate_transport_costs():
    rows = []
    for i in WAREHOUSES:
        for j in WAREHOUSES:
            if i != j:
                cost = round(random.uniform(2.5, 5.0), 2)
                rows.append([i, j, cost])
    df = pd.DataFrame(rows, columns=['from_warehouse', 'to_warehouse', 'cost_per_unit'])
    df.to_csv(f"{OUTPUT_DIR}/transport_costs.csv", index=False)

def main():
    generate_demand()
    generate_inventory()
    generate_transport_costs()
    print("Data generation complete CSVs saved in './data/'")

if __name__ == "__main__":
    main()

🔁 Generating synthetic supply chain data...
Data generation complete CSVs saved in './data/'
