In [None]:
# Sales Forecasting Dataset Generator
# Author: João Borboni
# Description: This notebook simulates a realistic sales dataset
# for use in forecasting and explainability projects with Power BI and Python.

import pandas as pd
import numpy as np
from faker import Faker

# Setup
fake = Faker()
np.random.seed(42)

# Configuration
months = pd.date_range(start="2022-01-01", end="2023-12-01", freq='MS')
regions = ['North America', 'Europe', 'Asia Pacific', 'Latin America']
segments = ['Consumer', 'Corporate', 'Home Office']
categories = ['Technology', 'Office Supplies', 'Furniture']

# Data generation
data = []

for date in months:
    for _ in range(15):  # 15 transactions per month
        region = np.random.choice(regions)
        segment = np.random.choice(segments)
        category = np.random.choice(categories)
        units_sold = np.random.randint(10, 500)
        unit_price = np.round(np.random.uniform(5, 100), 2)
        discount_percent = np.round(np.random.choice([0, 0.05, 0.1, 0.15, 0.2, 0.25, 0.3]), 2)
        revenue = units_sold * unit_price * (1 - discount_percent)

        data.append({
            'Date': date,
            'Region': region,
            'Segment': segment,
            'Product_Category': category,
            'Units_Sold': units_sold,
            'Unit_Price': unit_price,
            'Discount_Percent': discount_percent,
            'Revenue': round(revenue, 2)
        })

# Create DataFrame
df = pd.DataFrame(data)

# Save to CSV
output_path = 'data/raw/sales_forecasting_dataset.csv'
df.to_csv(output_path, index=False)

print(f"Dataset generated with {len(df)} rows and saved to '{output_path}'")
df.head()