# 01: Data Generation
This notebook generates a synthetic regression dataset and saves it to ../output/data/training_data.csv (root-level output/data/ folder).

import os
from pathlib import Path
import numpy as np
import pandas as pd

# Simple, self-contained data generation (no project imports)
np.random.seed(42)
num_samples = 5000
num_features = 8
noise = 0.1

# Generate features
X = np.random.randn(num_samples, num_features)
cols = [f"feature_{i+1}" for i in range(num_features)]
df = pd.DataFrame(X, columns=cols)

# Generate target as linear combination + noise
coeffs = np.linspace(1.0, 2.0, num_features)
y = X.dot(coeffs) + noise * np.random.randn(num_samples)
df["target"] = y

# Save to root-level output/data folder (parent of notebooks)
out_dir = Path("..") / "output" / "data"
out_dir.mkdir(parents=True, exist_ok=True)
csv_path = out_dir / "training_data.csv"

df.to_csv(csv_path, index=False)
print(f"Saved training data to {csv_path} with shape {df.shape}")