In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
# Install required packages
!pip install -q pytorch_lightning wandb torch torchvision pandas numpy matplotlib seaborn scikit-learn mlflow wand dagshub neuralforecast

# Set up Kaggle API
!pip install -q kaggle

Collecting pytorch_lightning
  Downloading pytorch_lightning-2.5.2-py3-none-any.whl.metadata (21 kB)
Collecting mlflow
  Downloading mlflow-3.1.4-py3-none-any.whl.metadata (29 kB)
Collecting wand
  Downloading Wand-0.6.13-py2.py3-none-any.whl.metadata (4.0 kB)
Collecting dagshub
  Downloading dagshub-0.6.2-py3-none-any.whl.metadata (12 kB)
Collecting torchmetrics>=0.7.0 (from pytorch_lightning)
  Downloading torchmetrics-1.8.0-py3-none-any.whl.metadata (21 kB)
Collecting lightning-utilities>=0.10.0 (from pytorch_lightning)
  Downloading lightning_utilities-0.15.0-py3-none-any.whl.metadata (5.7 kB)
Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-cupti-cu12==12.4.127 (from torch)
  Downloading nvidia_cud

In [3]:
# Upload your kaggle.json to Colab and run:
!mkdir -p ~/.kaggle
!cp /content/drive/MyDrive/ColabNotebooks/kaggle_API_credentials/kaggle.json ~/.kaggle/kaggle.json
! chmod 600 ~/.kaggle/kaggle.json

In [4]:
# Download the dataset
!kaggle competitions download -c walmart-recruiting-store-sales-forecasting
!unzip -q walmart-recruiting-store-sales-forecasting.zip

Downloading walmart-recruiting-store-sales-forecasting.zip to /content
  0% 0.00/2.70M [00:00<?, ?B/s]
100% 2.70M/2.70M [00:00<00:00, 408MB/s]


In [5]:
!unzip -q train.csv.zip
!unzip -q stores.csv.zip
!unzip -q test.csv.zip
!unzip -q features.csv.zip

unzip:  cannot find or open stores.csv.zip, stores.csv.zip.zip or stores.csv.zip.ZIP.


In [9]:
# N-BEATS Model for Full Walmart Dataset - Production Optimized
# ================================================================

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.base import BaseEstimator, TransformerMixin
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
import torch
import torch.nn as nn
from neuralforecast import NeuralForecast
from neuralforecast.models import NBEATS
from neuralforecast.losses.pytorch import MSE
import joblib
import wandb
from itertools import product
import logging
import warnings
import os
import gc
from datetime import datetime, timedelta
from multiprocessing import cpu_count
import psutil

# Configuration
warnings.filterwarnings('ignore')
logging.getLogger().setLevel(logging.WARNING)
logging.getLogger("neuralforecast").setLevel(logging.WARNING)
logging.getLogger("pytorch_lightning").setLevel(logging.WARNING)
logging.getLogger("lightning_fabric").setLevel(logging.WARNING)

# ============================================================
# System Optimization and Memory Management
# ============================================================

def check_system_resources():
    """Check available system resources"""
    memory = psutil.virtual_memory()
    cpu_cores = cpu_count()

    print(f"Available Memory: {memory.available / (1024**3):.2f} GB")
    print(f"Total Memory: {memory.total / (1024**3):.2f} GB")
    print(f"Memory Usage: {memory.percent}%")
    print(f"CPU Cores: {cpu_cores}")

    return {
        'available_memory_gb': memory.available / (1024**3),
        'total_memory_gb': memory.total / (1024**3),
        'memory_percent': memory.percent,
        'cpu_cores': cpu_cores
    }

def optimize_memory():
    """Force garbage collection and memory optimization"""
    gc.collect()
    if torch.cuda.is_available():
        torch.cuda.empty_cache()

class MemoryEfficientProcessor:
    """Memory-efficient data processor with chunking capabilities"""

    def __init__(self, chunk_size=1000000):
        self.chunk_size = chunk_size
        self.scaler = StandardScaler()
        self.is_fitted = False

    def load_data_efficiently(self, stores_path, features_path, train_path, test_path=None):
        """Load data with memory optimization"""
        print("Loading data efficiently...")

        # Load smaller files first
        self.stores = pd.read_csv(stores_path)
        print(f"Stores data: {self.stores.shape}")

        # Load features with chunking if needed
        try:
            self.features = pd.read_csv(features_path)
            print(f"Features data: {self.features.shape}")
        except MemoryError:
            print("Loading features in chunks due to memory constraints...")
            chunks = []
            for chunk in pd.read_csv(features_path, chunksize=self.chunk_size):
                chunks.append(chunk)
            self.features = pd.concat(chunks, ignore_index=True)
            del chunks
            optimize_memory()

        # Load training data with chunking if needed
        try:
            self.train = pd.read_csv(train_path)
            print(f"Training data: {self.train.shape}")
        except MemoryError:
            print("Loading training data in chunks due to memory constraints...")
            chunks = []
            for chunk in pd.read_csv(train_path, chunksize=self.chunk_size):
                chunks.append(chunk)
            self.train = pd.concat(chunks, ignore_index=True)
            del chunks
            optimize_memory()

        # Load test data if provided
        if test_path and os.path.exists(test_path):
            try:
                self.test = pd.read_csv(test_path)
                print(f"Test data: {self.test.shape}")
            except MemoryError:
                print("Loading test data in chunks...")
                chunks = []
                for chunk in pd.read_csv(test_path, chunksize=self.chunk_size):
                    chunks.append(chunk)
                self.test = pd.concat(chunks, ignore_index=True)
                del chunks
                optimize_memory()

        return self

    def preprocess_full_dataset(self, sample_stores=None, sample_depts=None):
        """Preprocess the full dataset with optional sampling for testing"""
        print("Preprocessing full dataset...")

        df = self.train.copy()

        # Convert Date columns to datetime
        print("Converting date columns...")
        df['Date'] = pd.to_datetime(df['Date'])
        features_df = self.features.copy()
        features_df['Date'] = pd.to_datetime(features_df['Date'])

        # Optional sampling for testing/development
        if sample_stores is not None:
            print(f"Sampling {sample_stores} stores for testing...")
            unique_stores = df['Store'].unique()[:sample_stores]
            df = df[df['Store'].isin(unique_stores)]
            features_df = features_df[features_df['Store'].isin(unique_stores)]

        if sample_depts is not None:
            print(f"Sampling {sample_depts} departments per store for testing...")
            sampled_data = []
            for store in df['Store'].unique():
                store_data = df[df['Store'] == store]
                unique_depts = store_data['Dept'].unique()[:sample_depts]
                sampled_data.append(store_data[store_data['Dept'].isin(unique_depts)])
            df = pd.concat(sampled_data, ignore_index=True)

        print(f"Working with {df['Store'].nunique()} stores and {df['Dept'].nunique()} departments")
        print(f"Data shape after sampling: {df.shape}")

        # Merge with stores data
        print("Merging with stores data...")
        df = df.merge(self.stores, on='Store', how='left')

        # Merge with features data in chunks to manage memory
        print("Merging with features data...")
        if len(df) > 1000000:  # Large dataset
            print("Using chunked merge for large dataset...")
            merged_chunks = []
            chunk_size = 500000

            for i in range(0, len(df), chunk_size):
                chunk = df.iloc[i:i+chunk_size]
                merged_chunk = chunk.merge(features_df, on=['Store', 'Date'], how='left')
                merged_chunks.append(merged_chunk)

            df = pd.concat(merged_chunks, ignore_index=True)
            del merged_chunks
            optimize_memory()
        else:
            df = df.merge(features_df, on=['Store', 'Date'], how='left')

        # Fill missing values
        print("Filling missing values...")
        df = df.fillna(0)

        # Sort data
        print("Sorting data...")
        df = df.sort_values(['Store', 'Dept', 'Date'])

        # Create unique_id for N-BEATS
        print("Creating unique identifiers...")
        df['unique_id'] = df['Store'].astype(str) + '_' + df['Dept'].astype(str)

        # Rename columns for N-BEATS format
        df = df.rename(columns={'Date': 'ds', 'Weekly_Sales': 'y'})

        # Select relevant columns
        columns_to_keep = ['unique_id', 'ds', 'y']
        if 'IsHoliday' in df.columns:
            columns_to_keep.append('IsHoliday')
        if 'Temperature' in df.columns:
            columns_to_keep.append('Temperature')
        if 'Fuel_Price' in df.columns:
            columns_to_keep.append('Fuel_Price')
        if 'CPI' in df.columns:
            columns_to_keep.append('CPI')
        if 'Unemployment' in df.columns:
            columns_to_keep.append('Unemployment')

        df = df[columns_to_keep]

        print(f"Final processed data shape: {df.shape}")
        print(f"Unique time series: {df['unique_id'].nunique()}")

        self.processed_data = df
        optimize_memory()
        return df

    def split_data_efficiently(self, test_size=0.2):
        """Split data efficiently for large datasets"""
        print("Splitting data efficiently...")

        df = self.processed_data.copy()
        df = df.sort_values(['unique_id', 'ds']).reset_index(drop=True)

        # Calculate split point
        unique_dates = sorted(df['ds'].unique())
        split_idx = int(len(unique_dates) * (1 - test_size))
        split_date = unique_dates[split_idx]

        print(f"Split date: {split_date}")
        print(f"Training dates: {split_idx}, Validation dates: {len(unique_dates) - split_idx}")

        # Split data
        train_df = df[df['ds'] < split_date].copy()
        valid_df = df[df['ds'] >= split_date].copy()

        print(f"Training data: {train_df.shape}")
        print(f"Validation data: {valid_df.shape}")
        print(f"Training unique IDs: {train_df['unique_id'].nunique()}")
        print(f"Validation unique IDs: {valid_df['unique_id'].nunique()}")

        # Separate features and target
        X_train = train_df.drop(columns=['y'])
        y_train = train_df['y']
        X_valid = valid_df.drop(columns=['y'])
        y_valid = valid_df['y']

        optimize_memory()
        return X_train, y_train, X_valid, y_valid

# ============================================================
# Scalable N-BEATS Implementation
# ============================================================

class ScalableNBEATSWrapper:
    """Scalable N-BEATS wrapper optimized for large datasets"""

    def __init__(self, **nbeats_params):
        self.nbeats_params = nbeats_params
        self.model = None
        self.nf = None
        self.train_data = None

    def fit(self, X, y=None):
        """Fit N-BEATS model with memory optimization"""
        print("Fitting scalable N-BEATS model...")

        # Prepare data
        if y is not None:
            data = X.copy()
            data['y'] = y.values if hasattr(y, 'values') else y
        else:
            data = X.copy()

        self.train_data = data.sort_values(['unique_id', 'ds']).reset_index(drop=True)

        # Optimize parameters based on dataset size
        safe_params = self.nbeats_params.copy()

        # Remove problematic parameters
        problematic_params = [
            'val_size', 'early_stop_patience_steps', 'enable_progress_bar',
            'enable_model_summary', 'shared_weights', 'activation'
        ]
        for param in problematic_params:
            safe_params.pop(param, None)

        # Adaptive parameter setting based on dataset size
        n_series = self.train_data['unique_id'].nunique()
        data_size = len(self.train_data)

        print(f"Dataset size: {data_size:,} observations, {n_series:,} time series")

        # Adjust parameters based on size
        if n_series > 1000:  # Large dataset
            safe_params.setdefault('max_steps', 50)
            safe_params.setdefault('batch_size', 64)
            safe_params.setdefault('h', 10)
            safe_params.setdefault('input_size', 20)
        elif n_series > 100:  # Medium dataset
            safe_params.setdefault('max_steps', 100)
            safe_params.setdefault('batch_size', 32)
            safe_params.setdefault('h', 15)
            safe_params.setdefault('input_size', 30)
        else:  # Small dataset
            safe_params.setdefault('max_steps', 200)
            safe_params.setdefault('batch_size', 16)
            safe_params.setdefault('h', 20)
            safe_params.setdefault('input_size', 40)

        # Essential parameters
        safe_params.setdefault('learning_rate', 1e-3)
        safe_params.setdefault('random_seed', 42)

        print(f"Using parameters: {safe_params}")

        # Create model
        self.model = NBEATS(**safe_params)
        self.nf = NeuralForecast(models=[self.model], freq='W')

        # Fit with progress tracking
        try:
            print("Training model...")
            start_time = datetime.now()
            self.nf.fit(self.train_data)
            training_time = datetime.now() - start_time
            print(f"Training completed in {training_time}")

        except Exception as e:
            print(f"Training failed: {e}")
            # Fallback with minimal parameters
            minimal_params = {
                'max_steps': 20,
                'h': 5,
                'input_size': 10,
                'batch_size': 32,
                'learning_rate': 1e-3,
                'random_seed': 42
            }
            print("Trying with minimal parameters...")
            self.model = NBEATS(**minimal_params)
            self.nf = NeuralForecast(models=[self.model], freq='W')
            self.nf.fit(self.train_data)

        optimize_memory()
        return self

    def predict(self, X):
        """Make predictions with error handling"""
        if self.nf is None:
            raise ValueError("Model not fitted yet!")

        validation_data = X.copy().sort_values(['unique_id', 'ds']).reset_index(drop=True)

        # Calculate horizon
        n_series = validation_data['unique_id'].nunique()
        horizon = len(validation_data) // n_series if n_series > 0 else 10

        print(f"Making predictions for {n_series:,} series with horizon {horizon}")

        try:
            predictions = self.nf.predict(h=horizon)
            pred_values = predictions['NBEATS'].values

            # Handle shape mismatch
            if len(pred_values) != len(validation_data):
                print(f"Adjusting predictions: got {len(pred_values)}, need {len(validation_data)}")
                if len(pred_values) < len(validation_data):
                    repeat_factor = len(validation_data) // len(pred_values) + 1
                    pred_values = np.tile(pred_values, repeat_factor)[:len(validation_data)]
                else:
                    pred_values = pred_values[:len(validation_data)]

            return pred_values

        except Exception as e:
            print(f"Prediction error: {e}")
            # Fallback prediction
            mean_pred = self.train_data['y'].mean()
            return np.full(len(validation_data), mean_pred)

# ============================================================
# Evaluation and Monitoring
# ============================================================

def wmae(y_true, y_pred, is_holiday):
    """Weighted Mean Absolute Error"""
    weights = np.where(is_holiday, 5, 1)
    mae = np.abs(y_true - y_pred)
    return np.sum(weights * mae) / np.sum(weights)

def comprehensive_evaluation(y_true, y_pred, is_holiday=None, unique_ids=None):
    """Comprehensive model evaluation"""
    from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score

    metrics = {
        'MAE': mean_absolute_error(y_true, y_pred),
        'RMSE': np.sqrt(mean_squared_error(y_true, y_pred)),
        'R2': r2_score(y_true, y_pred),
        'MAPE': np.mean(np.abs((y_true - y_pred) / np.where(y_true == 0, 1, y_true))) * 100
    }

    if is_holiday is not None:
        metrics['WMAE'] = wmae(y_true, y_pred, is_holiday)

    # Per-series metrics if unique_ids provided
    if unique_ids is not None:
        df_eval = pd.DataFrame({
            'y_true': y_true,
            'y_pred': y_pred,
            'unique_id': unique_ids
        })

        series_metrics = df_eval.groupby('unique_id').apply(
            lambda x: pd.Series({
                'MAE': mean_absolute_error(x['y_true'], x['y_pred']),
                'RMSE': np.sqrt(mean_squared_error(x['y_true'], x['y_pred']))
            })
        )

        metrics['Series_MAE_mean'] = series_metrics['MAE'].mean()
        metrics['Series_MAE_std'] = series_metrics['MAE'].std()
        metrics['Series_RMSE_mean'] = series_metrics['RMSE'].mean()
        metrics['Series_RMSE_std'] = series_metrics['RMSE'].std()

    return metrics

# ============================================================
# Production Pipeline
# ============================================================

def production_pipeline(stores_path='stores.csv',
                       features_path='features.csv',
                       train_path='train.csv',
                       test_path='test.csv',
                       sample_stores=None,
                       sample_depts=None,
                       use_wandb=True):
    """Production pipeline for full dataset"""

    # Check system resources
    resources = check_system_resources()

    # Initialize tracking
    if use_wandb:
        wandb.init(
            project="Walmart-Sales-Forecasting-Production",
            name=f"nbeats_full_dataset_{datetime.now().strftime('%Y%m%d_%H%M')}"
        )
        wandb.config.update(resources)

    # Load and preprocess data
    print("=== LOADING DATA ===")
    processor = MemoryEfficientProcessor()

    try:
        processor.load_data_efficiently(stores_path, features_path, train_path, test_path)

        # Preprocess data
        print("\n=== PREPROCESSING DATA ===")
        df = processor.preprocess_full_dataset(sample_stores=sample_stores, sample_depts=sample_depts)

        # Split data
        print("\n=== SPLITTING DATA ===")
        X_train, y_train, X_valid, y_valid = processor.split_data_efficiently()

        if use_wandb:
            wandb.config.update({
                'n_stores': df['unique_id'].str.split('_').str[0].nunique(),
                'n_departments': df['unique_id'].str.split('_').str[1].nunique(),
                'n_series': df['unique_id'].nunique(),
                'train_size': len(X_train),
                'valid_size': len(X_valid)
            })

    except Exception as e:
        print(f"Data loading/preprocessing failed: {e}")
        if use_wandb:
            wandb.finish()
        return None, None

    # Train model
    print("\n=== TRAINING MODEL ===")
    try:
        # Adaptive parameters based on dataset size
        n_series = X_train['unique_id'].nunique()

        if n_series > 1000:
            model_params = {
                'max_steps': 50,
                'h': 8,
                'input_size': 16,
                'batch_size': 128,
                'learning_rate': 1e-3,
                'random_seed': 42
            }
        elif n_series > 100:
            model_params = {
                'max_steps': 100,
                'h': 12,
                'input_size': 24,
                'batch_size': 64,
                'learning_rate': 1e-3,
                'random_seed': 42
            }
        else:
            model_params = {
                'max_steps': 200,
                'h': 15,
                'input_size': 30,
                'batch_size': 32,
                'learning_rate': 1e-3,
                'random_seed': 42
            }

        print(f"Using model parameters: {model_params}")

        # Create and train model
        model = ScalableNBEATSWrapper(**model_params)

        # Prepare training data
        train_data = X_train.copy()
        train_data['y'] = y_train

        # Train model
        start_time = datetime.now()
        model.fit(train_data)
        training_time = datetime.now() - start_time

        print(f"Training completed in {training_time}")

        # Make predictions
        print("\n=== MAKING PREDICTIONS ===")
        predictions = model.predict(X_valid)

        # Evaluate
        print("\n=== EVALUATING MODEL ===")
        is_holiday = X_valid.get('IsHoliday', np.zeros(len(y_valid)))
        unique_ids = X_valid['unique_id']

        metrics = comprehensive_evaluation(
            y_valid.values if hasattr(y_valid, 'values') else y_valid,
            predictions,
            is_holiday,
            unique_ids
        )

        print("Model Performance:")
        for metric, value in metrics.items():
            print(f"  {metric}: {value:.4f}")

        # Log results
        if use_wandb:
            wandb.config.update(model_params)
            wandb.log(metrics)
            wandb.log({'training_time_seconds': training_time.total_seconds()})

        # Save model
        model_filename = f'nbeats_production_model_{datetime.now().strftime("%Y%m%d_%H%M")}.pkl'
        joblib.dump(model, model_filename)
        print(f"Model saved as: {model_filename}")

        if use_wandb:
            wandb.finish()

        return model, metrics

    except Exception as e:
        print(f"Training failed: {e}")
        import traceback
        traceback.print_exc()
        if use_wandb:
            wandb.finish()
        return None, None

# ============================================================
# Quick Testing Function
# ============================================================

def quick_test_production(sample_stores=5, sample_depts=3):
    """Quick test with small sample"""
    print("=== QUICK PRODUCTION TEST ===")
    print(f"Testing with {sample_stores} stores and {sample_depts} departments per store")

    return production_pipeline(
        sample_stores=sample_stores,
        sample_depts=sample_depts,
        use_wandb=False
    )

# ============================================================
# Notebook-Friendly Execution Functions
# ============================================================

def run_quick_test(sample_stores=5, sample_depts=3, use_wandb=False):
    """Run quick test - notebook friendly"""
    print("=== RUNNING QUICK TEST ===")
    print(f"Testing with {sample_stores} stores and {sample_depts} departments per store")

    model, metrics = production_pipeline(
        sample_stores=sample_stores,
        sample_depts=sample_depts,
        use_wandb=use_wandb
    )

    if model is not None:
        print("\n=== TEST COMPLETED SUCCESSFULLY ===")
        print("Key metrics:")
        for key, value in metrics.items():
            if not key.startswith('Series_'):
                print(f"  {key}: {value:.4f}")
        return model, metrics
    else:
        print("\n=== TEST FAILED ===")
        return None, None

def run_full_production(use_wandb=True, sample_stores=None, sample_depts=None):
    """Run full production pipeline - notebook friendly"""
    print("=== RUNNING FULL PRODUCTION PIPELINE ===")

    model, metrics = production_pipeline(
        sample_stores=sample_stores,
        sample_depts=sample_depts,
        use_wandb=use_wandb
    )

    if model is not None:
        print("\n=== PRODUCTION RUN COMPLETED SUCCESSFULLY ===")
        print("Key metrics:")
        for key, value in metrics.items():
            if not key.startswith('Series_'):
                print(f"  {key}: {value:.4f}")
        return model, metrics
    else:
        print("\n=== PRODUCTION RUN FAILED ===")
        return None, None

# ============================================================
# Main Execution (Notebook Safe)
# ============================================================

def main():
    """Main function for notebook execution"""
    print("N-BEATS Walmart Sales Forecasting - Ready!")
    print("\nAvailable functions:")
    print("1. run_quick_test(sample_stores=5, sample_depts=3) - Quick test with sample data")
    print("2. run_full_production() - Full dataset production run")
    print("3. production_pipeline() - Direct access to pipeline")
    print("\nExample usage:")
    print("  model, metrics = run_quick_test(sample_stores=10, sample_depts=5)")
    print("  model, metrics = run_full_production()")

# Auto-run main if not in interactive environment
if __name__ == "__main__":
    try:
        # Check if we're in an interactive environment
        get_ipython()
        # If we are, just show the help
        main()
    except NameError:
        # If we're not in interactive environment, run command line interface
        import sys
        if len(sys.argv) == 1:
            # No arguments, run quick test
            print("No arguments provided, running quick test...")
            run_quick_test()
        else:
            print("For command line usage, use the functions directly in Python:")
            print("python -c \"from script_name import run_quick_test; run_quick_test()\"")
            main()
            # Quick test with 10 stores and 5 departments per store
model, metrics = run_quick_test(sample_stores=10, sample_depts=5)

N-BEATS Walmart Sales Forecasting - Ready!

Available functions:
1. run_quick_test(sample_stores=5, sample_depts=3) - Quick test with sample data
2. run_full_production() - Full dataset production run
3. production_pipeline() - Direct access to pipeline

Example usage:
  model, metrics = run_quick_test(sample_stores=10, sample_depts=5)
  model, metrics = run_full_production()
=== RUNNING QUICK TEST ===
Testing with 10 stores and 5 departments per store
Available Memory: 9.69 GB
Total Memory: 12.67 GB
Memory Usage: 23.5%
CPU Cores: 2
=== LOADING DATA ===
Loading data efficiently...
Stores data: (45, 3)
Features data: (8190, 12)
Training data: (421570, 5)
Test data: (115064, 4)

=== PREPROCESSING DATA ===
Preprocessing full dataset...
Converting date columns...
Sampling 10 stores for testing...
Sampling 5 departments per store for testing...
Working with 10 stores and 5 departments
Data shape after sampling: (7150, 5)
Merging with stores data...
Merging with features data...
Filling miss

Sanity Checking: |          | 0/? [00:00<?, ?it/s]

Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Training completed in 0:01:10.496068
Training completed in 0:01:11.463845

=== MAKING PREDICTIONS ===
Making predictions for 50 series with horizon 29
Prediction error: DataLoader.__init__() got an unexpected keyword argument 'h'

=== EVALUATING MODEL ===
Model Performance:
  MAE: 16833.4573
  RMSE: 22410.8934
  R2: -0.0025
  MAPE: 114.1197
  WMAE: 16833.4573
  Series_MAE_mean: 16833.4573
  Series_MAE_std: 13873.7252
  Series_RMSE_mean: 17803.0762
  Series_RMSE_std: 13750.6447
Model saved as: nbeats_production_model_20250731_2032.pkl

=== TEST COMPLETED SUCCESSFULLY ===
Key metrics:
  MAE: 16833.4573
  RMSE: 22410.8934
  R2: -0.0025
  MAPE: 114.1197
  WMAE: 16833.4573
