In [None]:
# Standard library imports
import logging
from pathlib import Path
import numpy as np
import pandas as pd
import polars as pl

# ML libraries (same as original notebook)
import xgboost as xgb
import optuna
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.model_selection import TimeSeriesSplit

# Visualization (same as original notebook)
from lets_plot import *
LetsPlot.setup_html()

# Import our new framework components
from src import (
    # Core data structures
    DataConfig, TrainingConfig, GranularityLevel,
    ModelMetadata, BenchmarkModel, ModelRegistry,
    
    # Main classes
    DataLoader, FeatureEngineer, ModelTrainer,
    ModelEvaluator, VisualizationGenerator,
    
    # Pipeline orchestration
    BenchmarkPipeline
)

# Setup logging
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
logger = logging.getLogger(__name__)

print("Framework imported successfully!")
print(f"Polars version: {pl.__version__}")
print(f"XGBoost version: {xgb.__version__}")

Framework imported successfully!
Polars version: 1.31.0
XGBoost version: 3.0.2


In [4]:
# FIXED: Add data file existence checks
data_dir = Path("data")
features_path = data_dir / "train_data_features.feather"
target_path = data_dir / "train_data_target.feather"
mapping_path = data_dir / "feature_mapping_train.pkl"

In [6]:

# Verify paths exist
missing_files = []
for path in [features_path, target_path, mapping_path]:
    if not path.exists():
        missing_files.append(str(path))

print(missing_files)

[]


In [7]:
data_config = DataConfig(
    features_path=str(features_path),
    target_path=str(target_path), 
    mapping_path=str(mapping_path),
    date_column="date",
    target_column="target",
    bdid_column="bdID",
    
    # Feature engineering configuration (matches original notebook)
    remove_not_for_sale=True,
    lag_features=[1, 2, 3, 4, 5, 6, 7],  # Same as notebook
    calendric_features=True,
    trend_features=True
)

In [8]:
# Use faster configuration for demo
demo_training_config = TrainingConfig(
    validation_split=0.2,
    n_trials=50,  # Very fast for demo
    model_type="xgboost"
)

In [9]:
print("Configuration setup complete!")
print(f"Data directory: {data_dir}")
print(f"Files exist: Features={features_path.exists()}, Target={target_path.exists()}, Mapping={mapping_path.exists()}")
print(f"Lag features: {data_config.lag_features}")
print(f"Training trials: {demo_training_config.n_trials}")

Configuration setup complete!
Data directory: data
Files exist: Features=True, Target=True, Mapping=True
Lag features: [1, 2, 3, 4, 5, 6, 7]
Training trials: 50


In [15]:
loader = DataLoader(data_config)

In [17]:
features, target, mapping = loader.load_data()

2025-07-28 16:34:47,727 - INFO - Loading M5 dataset...
2025-07-28 16:34:47,731 - INFO - Data loading completed


In [22]:
features.head()

In [None]:
pipeline = BenchmarkPipeline(
    data_config=data_config,
    training_config=demo_training_config,
    output_dir=Path("pipeline_demo_results")
)

In [11]:
# Load data once
pipeline.load_and_prepare_data()

2025-07-27 21:40:20,121 - INFO - Loading and preparing M5 dataset...
2025-07-27 21:40:20,123 - INFO - Loading M5 dataset...
2025-07-27 21:40:50,360 - INFO - Data loading completed
2025-07-27 21:40:50,380 - INFO - Data loading completed


In [12]:
demo_product_id = 80558

In [13]:
# Run a single model experiment with the pipeline
print(f"\nRunning complete pipeline for Product {demo_product_id}...")

pipeline_model = pipeline.run_single_model_experiment(
    granularity=GranularityLevel.PRODUCT,
    entity_ids={"productID": demo_product_id},
    experiment_name="pipeline_demo"
)

2025-07-27 21:41:31,362 - INFO - Running experiment: pipeline_demo
2025-07-27 21:41:31,429 - INFO - Creating dummy variables for 10 SKUs in this product



Running complete pipeline for Product 80558...


2025-07-27 21:41:32,302 - INFO - Creating features for product level
2025-07-27 21:41:32,337 - INFO - Created 148 features
2025-07-27 21:41:32,341 - INFO - Prepared data: 19400 samples, 148 features
2025-07-27 21:41:32,342 - INFO - Dataset prepared: 19400 samples, 148 features
2025-07-27 21:41:32,345 - INFO - Created temporal split: 15520 train, 3880 validation
2025-07-27 21:41:32,345 - INFO - Split date: 2015-05-01
2025-07-27 21:41:32,350 - INFO - Training xgboost model for product level
[I 2025-07-27 21:41:32,355] A new study created in memory with name: no-name-7892fda4-5f11-4c60-a314-7dc13c846328
[I 2025-07-27 21:41:33,932] Trial 0 finished with value: 593.9608247422681 and parameters: {'n_estimators': 165, 'max_depth': 15, 'learning_rate': 0.4133089014532649, 'subsample': 0.9597929851326465, 'colsample_bytree': 0.8100354497196494, 'reg_alpha': 1.1301871337581892, 'reg_lambda': 9.018074936276102}. Best is trial 0 with value: 593.9608247422681.
[I 2025-07-27 21:41:34,803] Trial 1 fi

In [14]:
# Evaluate using pipeline
pipeline_results = pipeline.evaluate_all_models()

# Save experiment log
pipeline.save_experiment_log()



# Show experiment log preview
if pipeline.experiment_log:
    print(f"\nExperiment Log Summary:")
    for exp in pipeline.experiment_log:
        print(f"  - {exp['experiment_name']}: {exp['n_samples']} samples, {exp['n_features']} features")
        print(f"    Performance: RMSE {exp['performance'].get('rmse', 'N/A')}")

2025-07-27 21:43:04,939 - INFO - Evaluating all models in registry
2025-07-27 21:43:04,941 - INFO - Evaluating all models at sku level
2025-07-27 21:43:04,942 - INFO - Evaluating all models at product level
2025-07-27 21:43:04,943 - INFO - Comparing 1 models
2025-07-27 21:43:04,944 - INFO - Evaluating model: product_80558_xgboost
2025-07-27 21:43:04,945 - INFO - Evaluating all models at store level
2025-07-27 21:43:04,946 - INFO - Comparing 1 models
2025-07-27 21:43:04,946 - INFO - Evaluating model: product_80558_xgboost
2025-07-27 21:43:04,959 - INFO - Report saved to pipeline_demo_results/evaluation_results/product_evaluation_report.md
2025-07-27 21:43:04,960 - INFO - Evaluation results saved to pipeline_demo_results/evaluation_results
2025-07-27 21:43:04,961 - INFO - Experiment log saved to pipeline_demo_results/experiment_log.json



Experiment Log Summary:
  - pipeline_demo: 19400 samples, 148 features
    Performance: RMSE 19.353560635664707
