# TabTransformer Sales Forecasting Walkthrough

This notebook shows how to generate a synthetic Rossmann-style dataset, prepare features, train a baseline linear regression model, and evaluate metrics using the reusable modules that power the CLI workflow.

In [None]:
from __future__ import annotations
from pathlib import Path
import pandas as pd

from tabtransformer_sales.src import data, train, evaluate, utils
from tabtransformer_sales.src.metrics import MetricsReport

RAW_DIR = Path("data/raw")
RAW_DIR.mkdir(parents=True, exist_ok=True)


In [None]:
records = []
for store in range(1, 4):
    for offset in range(18):
        day = offset + 1
        records.append(
            {
                "Store": store,
                "DayOfWeek": (day % 7) + 1,
                "Date": f"2024-02-{day:02d}",
                "Promo": offset % 2,
                "StateHoliday": "0",
                "SchoolHoliday": 0,
                "Customers": 120 + store * 5 + offset * 3,
                "Sales": 1000 + store * 25 + offset * 8,
                "CompetitionDistance": 400.0 + store * 10,
            }
        )
synthetic_df = pd.DataFrame(records)
synth_path = RAW_DIR / "notebook_synthetic.csv"
synthetic_df.to_csv(synth_path, index=False)
synthetic_df.head()


In [None]:
config = utils.load_yaml("config/default.yaml")
config["paths"]["raw_csv"] = str(synth_path)
config["training"].update({"epochs": 1, "batch_size": 32, "device": "cpu", "amp": False})
config["splits"].update({"train_ratio": 0.6, "val_ratio": 0.2, "test_ratio": 0.2})
config


In [None]:
X, y, metadata = data.load_sales_data(synth_path, config, save=True)
metadata


In [None]:
train_result = train.train_pipeline(config, "linreg")
train_result


In [None]:
notebook_metrics = evaluate.evaluate_pipeline(config, "linreg", split="test")
MetricsReport(notebook_metrics, label="Notebook Test")


The notebook reused the production modules, covered file I/O, looping constructs, and surfaced the operator-overloaded `MetricsReport` helper for quick inspection.
