In [1]:
import json
from pathlib import Path
from online_retail_simulator.simulator import simulate
from online_retail_simulator.simulator_synthesizer_based import simulate_synthesizer_based

# Path to your config file (adjust as needed)
config_path = "config_synthesizer.json"

# Step 1: Run rule-based simulation to get products and sales DataFrames
products_df, sales_df = simulate(config_path, mode="rule")
print(f"Rule-based simulation: {len(products_df)} products, {len(sales_df)} sales")

Online Retail Simulator

Configuration:
  Seed:         42
  Products:     30
  Date Range:   2024-11-01 to 2024-11-15
  Output Dir:   demo/output_mc

Generating 30 products...
✓ Generated 30 products

Generating baseline sales records (all product-date combinations)...
✓ Generated 450 baseline records (product-date pairs)
Data saved to demo/output_mc/sdv_demo_products.json
Data saved to demo/output_mc/sdv_demo_sales.json

Summary
Total Products:     30
Product Categories: 8
Total Products:     30
Total Records:      450 (product-date pairs)
Days Simulated:     15
Total Units Sold:   617
Total Revenue:      $90,415.09

✓ Simulation complete!
Rule-based simulation: 30 products, 450 sales


In [2]:
# Step 2: Merge DataFrames for synthesizer input
merged_df = sales_df.merge(products_df, on="product_id", how="left")
print(f"Merged DataFrame: {merged_df.shape}")

# Step 3: Generate synthetic data from merged DataFrame
synthetic_df = simulate_synthesizer_based(merged_df, num_rows=len(merged_df))
print(f"Synthetic DataFrame: {synthetic_df.shape}")

Merged DataFrame: (450, 7)
Training SDV Synthesizer (single-table)
Synthesizer Type: gaussian_copula
Input Data: 450 rows, 7 columns

Generating 450 synthetic rows...
✓ Generated 450 synthetic rows

✓ Sampling complete!
Synthetic DataFrame: (450, 7)




In [5]:
merged_df.head(5)

Unnamed: 0,product_id,date,quantity,revenue,name,category,price
0,PROD0001,2024-11-01,1,152.19,T-Shirt,Clothing,152.19
1,PROD0002,2024-11-01,1,16.98,Biography,Books,16.98
2,PROD0003,2024-11-01,0,0.0,Jeans,Clothing,124.24
3,PROD0004,2024-11-01,3,557.58,Laptop,Electronics,185.86
4,PROD0005,2024-11-01,1,38.06,Novel,Books,38.06


In [9]:
synthetic_df["category"].value_counts()

category
Clothing             109
Sports & Outdoors     93
Books                 78
Toys & Games          50
Health & Beauty       37
Home & Garden         33
Food & Beverage       28
Electronics           22
Name: count, dtype: int64

In [10]:
merged_df["category"].value_counts()

category
Clothing             105
Books                 90
Sports & Outdoors     75
Home & Garden         45
Toys & Games          45
Electronics           30
Food & Beverage       30
Health & Beauty       30
Name: count, dtype: int64