<a href="https://colab.research.google.com/github/borbonijoao/powerbi-ai-forecasting/blob/main/notebooks/dataset_generator.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [14]:
# Sales Forecasting Dataset Generator
# Author: João Borboni
# Description: This notebook simulates a realistic sales dataset
# for use in forecasting and explainability projects with Power BI and Python.

import pandas as pd
import numpy as np
import os
from faker import Faker

# Setup
fake = Faker()
np.random.seed(42)

# Configuration
months = pd.date_range(start="2022-01-01", end="2023-12-01", freq='MS')
regions = ['North America', 'Europe', 'Asia Pacific', 'Latin America']
segments = ['Consumer', 'Corporate', 'Home Office']
categories = ['Technology', 'Office Supplies', 'Furniture']

# Data generation
data = []

for date in months:
    for _ in range(15):  # 15 transactions per month
        region = np.random.choice(regions)
        segment = np.random.choice(segments)
        category = np.random.choice(categories)
        units_sold = np.random.randint(10, 500)
        unit_price = np.round(np.random.uniform(5, 100), 2)
        discount_percent = np.round(np.random.choice([0, 0.05, 0.1, 0.15, 0.2, 0.25, 0.3]), 2)
        revenue = units_sold * unit_price * (1 - discount_percent)

        data.append({
            'date': date,
            'region': region,
            'segment': segment,
            'product_category': category,
            'units_sold': units_sold,
            'unit_price': unit_price,
            'discount_percent': discount_percent,
            'revenue': round(revenue, 2)
        })

# Create DataFrame
df = pd.DataFrame(data)

# Save to CSV
output_path = '../data/raw/sales_forecasting_dataset.csv'
os.makedirs(os.path.dirname(output_path), exist_ok=True)
df.to_csv(output_path, index=False)

print(f"Dataset generated with {len(df)} rows and saved to '{output_path}'")
df.head()

Dataset generated with 360 rows and saved to '../data/raw/sales_forecasting_dataset.csv'


Unnamed: 0,date,region,segment,product_category,units_sold,unit_price,discount_percent,revenue
0,2022-01-01,Asia Pacific,Consumer,Furniture,116,79.07,0.2,7337.7
1,2022-01-01,Asia Pacific,Corporate,Furniture,224,10.52,0.2,1885.18
2,2022-01-01,Latin America,Home Office,Office Supplies,318,97.14,0.15,26256.94
3,2022-01-01,Europe,Corporate,Office Supplies,201,99.26,0.0,19951.26
4,2022-01-01,Latin America,Corporate,Office Supplies,262,46.03,0.0,12059.86
