In [3]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
from sklearn.linear_model import LinearRegression, Ridge, Lasso, ElasticNet
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor
from sklearn.svm import SVR
import xgboost as xgb
from sklearn.model_selection import TimeSeriesSplit
import warnings
warnings.filterwarnings('ignore')

# Set plotting style
plt.style.use('ggplot')
sns.set_palette("deep")

def load_data(file_path):
    """Load and prepare the time series data"""
    df = pd.read_csv(file_path)
    
    # Convert time column to datetime
    df['time'] = pd.to_datetime(df['time'])
    
    # Set time as index
    df.set_index('time', inplace=True)
    
    return df

def engineer_features(df):
    """Create additional features for modeling"""
    df_copy = df.copy()
    
    # Create lag features
    for i in range(1, 13):
        if 'cpi_mom' in df_copy.columns:
            df_copy[f'cpi_mom_lag_{i}'] = df_copy['cpi_mom'].shift(i)
        if 'cpi_yoy' in df_copy.columns:
            df_copy[f'cpi_yoy_lag_{i}'] = df_copy['cpi_yoy'].shift(i)
    
    # Create rolling window features
    for window in [3, 6, 12]:
        if 'cpi_mom' in df_copy.columns:
            df_copy[f'cpi_mom_rolling_mean_{window}'] = df_copy['cpi_mom'].rolling(window=window).mean()
            df_copy[f'cpi_mom_rolling_std_{window}'] = df_copy['cpi_mom'].rolling(window=window).std()
        if 'cpi_yoy' in df_copy.columns:
            df_copy[f'cpi_yoy_rolling_mean_{window}'] = df_copy['cpi_yoy'].rolling(window=window).mean()
            df_copy[f'cpi_yoy_rolling_std_{window}'] = df_copy['cpi_yoy'].rolling(window=window).std()
    
    # Create economic indicator lag features
    for i in range(1, 4):
        if 'oil_price' in df_copy.columns:
            df_copy[f'oil_price_lag_{i}'] = df_copy['oil_price'].shift(i)
        if 'gold_price' in df_copy.columns:
            df_copy[f'gold_price_lag_{i}'] = df_copy['gold_price'].shift(i)
        if 'interest_rate' in df_copy.columns:
            df_copy[f'interest_rate_lag_{i}'] = df_copy['interest_rate'].shift(i)
    
    # Add month and year as cyclical features
    if 'month' in df_copy.columns:
        df_copy['month_sin'] = np.sin(2 * np.pi * df_copy['month']/12)
        df_copy['month_cos'] = np.cos(2 * np.pi * df_copy['month']/12)
    
    # Create interaction features
    if all(col in df_copy.columns for col in ['oil_price', 'gold_price']):
        df_copy['oil_gold_ratio'] = df_copy['oil_price'] / df_copy['gold_price']
    
    # Drop rows with NaN values (due to lag features)
    df_clean = df_copy.dropna()
    
    return df_clean

def calculate_metrics(actual, predicted):
    """Calculate evaluation metrics"""
    mse = mean_squared_error(actual, predicted)
    rmse = np.sqrt(mse)
    mae = mean_absolute_error(actual, predicted)
    r2 = r2_score(actual, predicted)
    mape = np.mean(np.abs((actual - predicted) / actual)) * 100
    
    return rmse, mae, mape, r2

def prepare_data_for_modeling(df, target_col, test_size=0.2, validation_size=0.1):
    """
    Split data into features and target, and into training, validation and testing sets
    
    Parameters:
    -----------
    df : pd.DataFrame
        Input dataframe with features and target
    target_col : str
        Name of the target column
    test_size : float
        Proportion of data to use for testing
    validation_size : float
        Proportion of remaining data to use for validation
        
    Returns:
    --------
    tuple
        X_train, X_val, X_test, y_train, y_val, y_test, scaler
    """
    # Define features and target
    X = df.drop([col for col in ['cpi_mom', 'cpi_yoy', target_col] if col in df.columns], axis=1)
    y = df[target_col]
    
    # Split data into training and testing sets (time-based split)
    split_test_idx = int(len(df) * (1 - test_size))
    X_temp, X_test = X.iloc[:split_test_idx], X.iloc[split_test_idx:]
    y_temp, y_test = y.iloc[:split_test_idx], y.iloc[split_test_idx:]
    
    # Split remaining data into training and validation
    split_val_idx = int(len(X_temp) * (1 - validation_size))
    X_train, X_val = X_temp.iloc[:split_val_idx], X_temp.iloc[split_val_idx:]
    y_train, y_val = y_temp.iloc[:split_val_idx], y_temp.iloc[split_val_idx:]
    
    # Scale features
    scaler = StandardScaler()
    X_train_scaled = scaler.fit_transform(X_train)
    X_val_scaled = scaler.transform(X_val)
    X_test_scaled = scaler.transform(X_test)
    
    return X_train, X_val, X_test, y_train, y_val, y_test, X_train_scaled, X_val_scaled, X_test_scaled, scaler

def train_base_models(X_train, X_val, y_train, y_val, X_train_scaled, X_val_scaled):
    """
    Train base models for stacking
    
    Parameters:
    -----------
    X_train, X_val : pd.DataFrame
        Training and validation features
    y_train, y_val : pd.Series
        Training and validation targets
    X_train_scaled, X_val_scaled : np.array
        Scaled training and validation features
        
    Returns:
    --------
    dict
        Dictionary of trained models and their validation predictions
    """
    # Define base models
    base_models = {
        'Linear Regression': LinearRegression(),
        'Ridge': Ridge(alpha=0.1),
        'Lasso': Lasso(alpha=0.01),
        'ElasticNet': ElasticNet(alpha=0.01, l1_ratio=0.5),
        'Random Forest': RandomForestRegressor(n_estimators=100, random_state=42),
        'Gradient Boosting': GradientBoostingRegressor(n_estimators=100, random_state=42),
        'XGBoost': xgb.XGBRegressor(n_estimators=100, learning_rate=0.1, random_state=42),
        'SVR': SVR(kernel='rbf', C=1.0, epsilon=0.1)
    }
    
    # Train base models and get validation predictions
    base_model_preds = {}
    
    for name, model in base_models.items():
        print(f"Training {name}...")
        
        # Use scaled data for linear models and SVR, unscaled for tree-based models
        if name in ['Linear Regression', 'Ridge', 'Lasso', 'ElasticNet', 'SVR']:
            model.fit(X_train_scaled, y_train)
            val_pred = model.predict(X_val_scaled)
        else:
            model.fit(X_train, y_train)
            val_pred = model.predict(X_val)
        
        # Calculate validation metrics
        val_rmse, val_mae, val_mape, val_r2 = calculate_metrics(y_val, val_pred)
        print(f"{name} - Validation RMSE: {val_rmse:.4f}, MAE: {val_mae:.4f}, MAPE: {val_mape:.2f}%, R²: {val_r2:.4f}")
        
        # Store model and validation predictions
        base_model_preds[name] = {
            'model': model,
            'val_pred': val_pred,
            'val_rmse': val_rmse,
            'val_mae': val_mae,
            'val_mape': val_mape,
            'val_r2': val_r2
        }
    
    return base_model_preds

def train_meta_model(base_model_preds, X_val, y_val, X_test, meta_model_type='Ridge'):
    """
    Train meta-model for stacking
    
    Parameters:
    -----------
    base_model_preds : dict
        Dictionary of base models and their validation predictions
    X_val, y_val : pd.DataFrame, pd.Series
        Validation features and target
    X_test : pd.DataFrame
        Test features
    meta_model_type : str
        Type of meta-model to use
        
    Returns:
    --------
    tuple
        Meta-model, test predictions, and base model test predictions
    """
    # Create meta-features from base model predictions
    meta_features = pd.DataFrame({
        name: preds['val_pred'] for name, preds in base_model_preds.items()
    }, index=X_val.index)
    
    # Select meta-model
    if meta_model_type == 'Ridge':
        meta_model = Ridge(alpha=0.1)
    elif meta_model_type == 'Lasso':
        meta_model = Lasso(alpha=0.01)
    elif meta_model_type == 'ElasticNet':
        meta_model = ElasticNet(alpha=0.01, l1_ratio=0.5)
    elif meta_model_type == 'Linear':
        meta_model = LinearRegression()
    else:
        meta_model = Ridge(alpha=0.1)  # Default
    
    # Train meta-model
    print(f"\nTraining {meta_model_type} meta-model...")
    meta_model.fit(meta_features, y_val)
    
    # Get base model predictions on test set
    base_test_preds = {}
    for name, model_dict in base_model_preds.items():
        model = model_dict['model']
        
        # Use scaled data for linear models and SVR, unscaled for tree-based models
        if name in ['Linear Regression', 'Ridge', 'Lasso', 'ElasticNet', 'SVR']:
            test_pred = model.predict(X_test)
        else:
            test_pred = model.predict(X_test)
        
        base_test_preds[name] = test_pred
    
    # Create meta-features for test set
    meta_features_test = pd.DataFrame(base_test_preds, index=X_test.index)
    
    # Make predictions with meta-model
    test_pred = meta_model.predict(meta_features_test)
    
    # Get meta-model coefficients
    if hasattr(meta_model, 'coef_'):
        print("\nMeta-model coefficients:")
        for name, coef in zip(base_test_preds.keys(), meta_model.coef_):
            print(f"{name}: {coef:.4f}")
    
    return meta_model, test_pred, base_test_preds, meta_features_test

def run_stacking(df, target_col, test_size=0.2, validation_size=0.1, meta_model_type='Ridge'):
    """
    Run stacking ensemble for time series forecasting
    
    Parameters:
    -----------
    df : pd.DataFrame
        Input dataframe with engineered features
    target_col : str
        Name of the target column
    test_size : float
        Proportion of data to use for testing
    validation_size : float
        Proportion of remaining data to use for validation
    meta_model_type : str
        Type of meta-model to use
        
    Returns:
    --------
    tuple
        Meta-model, predictions, metrics
    """
    print(f"Running Stacking Ensemble for {target_col}...")
    
    # Prepare data
    X_train, X_val, X_test, y_train, y_val, y_test, X_train_scaled, X_val_scaled, X_test_scaled, scaler = prepare_data_for_modeling(
        df, target_col, test_size, validation_size
    )
    
    print(f"Training data: {X_train.shape}, Validation data: {X_val.shape}, Test data: {X_test.shape}")
    
    # Train base models
    base_model_preds = train_base_models(X_train, X_val, y_train, y_val, X_train_scaled, X_val_scaled)
    
    # Train meta-model
    meta_model, test_pred, base_test_preds, meta_features_test = train_meta_model(
        base_model_preds, X_val, y_val, X_test, meta_model_type
    )
    
    # Calculate metrics for stacking ensemble
    stack_rmse, stack_mae, stack_mape, stack_r2 = calculate_metrics(y_test, test_pred)
    print(f"\nStacking Ensemble - Test RMSE: {stack_rmse:.4f}, MAE: {stack_mae:.4f}, MAPE: {stack_mape:.2f}%, R²: {stack_r2:.4f}")
    
    # Calculate metrics for individual base models on test set
    print("\nBase Model Test Performance:")
    base_metrics = {}
    for name, pred in base_test_preds.items():
        rmse, mae, mape, r2 = calculate_metrics(y_test, pred)
        print(f"{name} - Test RMSE: {rmse:.4f}, MAE: {mae:.4f}, MAPE: {mape:.2f}%, R²: {r2:.4f}")
        base_metrics[name] = {'rmse': rmse, 'mae': mae, 'mape': mape, 'r2': r2}
    
    # Plot actual vs predicted for stacking and base models
    plt.figure(figsize=(14, 7))
    plt.plot(y_test.index, y_test, label='Actual', color='black', linewidth=2)
    plt.plot(y_test.index, test_pred, label='Stacking Ensemble', color='red', linewidth=2)
    
    # Plot top 3 base models
    top_models = sorted(base_metrics.items(), key=lambda x: x[1]['rmse'])[:3]
    for name, metrics in top_models:
        plt.plot(y_test.index, base_test_preds[name], label=f'{name}', linestyle='--', alpha=0.7)
    
    plt.title(f'Stacking Ensemble: Actual vs Predicted {target_col}')
    plt.xlabel('Date')
    plt.ylabel(target_col)
    plt.legend()
    plt.grid(True)
    plt.tight_layout()
    plt.savefig(f'plots/stacking_predictions_{target_col}.png')
    plt.close()
    
    # Plot error distribution
    plt.figure(figsize=(10, 6))
    errors = y_test - test_pred
    sns.histplot(errors, kde=True)
    plt.title(f'Stacking Ensemble: Error Distribution for {target_col}')
    plt.xlabel('Error')
    plt.savefig(f'plots/stacking_error_distribution_{target_col}.png')
    plt.close()
    
    # Create a DataFrame with predictions
    predictions = pd.DataFrame({
        'Actual': y_test,
        'Stacking': test_pred,
    })
    
    # Add top 3 base models
    for name, _ in top_models:
        predictions[name] = base_test_preds[name]
    
    # Add errors
    predictions['Error'] = predictions['Actual'] - predictions['Stacking']
    
    # Return results
    metrics = {
        'rmse': stack_rmse,
        'mae': stack_mae,
        'mape': stack_mape,
        'r2': stack_r2,
        'base_metrics': base_metrics
    }
    
    return meta_model, predictions, metrics, base_model_preds, meta_features_test

def forecast_future_stacking(meta_model, base_models, df, target_col, forecast_horizon=24, scaler=None):
    """
    Generate future forecasts using the stacking ensemble
    
    Parameters:
    -----------
    meta_model : sklearn model
        Trained meta-model
    base_models : dict
        Dictionary of trained base models
    df : pd.DataFrame
        Input dataframe with features
    target_col : str
        Name of the target column
    forecast_horizon : int
        Number of periods to forecast
    scaler : StandardScaler
        Scaler used for features
        
    Returns:
    --------
    pd.Series
        Forecasted values
    """
    print(f"Generating {forecast_horizon} period forecast...")
    
    # Create a copy of the dataframe
    df_forecast = df.copy()
    
    # Get the last date in the dataframe
    last_date = df_forecast.index[-1]
    
    # Create a list to store forecasts
    forecasts = []
    
    # Generate forecasts recursively
    for i in range(forecast_horizon):
        # Get the features for the next period
        if i == 0:
            # For the first forecast, use the last row of the dataframe
            X_next = df_forecast.drop([col for col in ['cpi_mom', 'cpi_yoy', target_col] if col in df_forecast.columns], axis=1).iloc[-1:].copy()
        else:
            # For a more accurate implementation, we would need to update all features
            # based on the new forecast, but this is a simplified version
            # Here we just use the last known features
            X_next = df_forecast.drop([col for col in ['cpi_mom', 'cpi_yoy', target_col] if col in df_forecast.columns], axis=1).iloc[-1:].copy()
        
        # Get base model predictions
        base_preds = {}
        for name, model_dict in base_models.items():
            model = model_dict['model']
            
            # Use scaled data for linear models and SVR, unscaled for tree-based models
            if name in ['Linear Regression', 'Ridge', 'Lasso', 'ElasticNet', 'SVR']:
                if scaler:
                    X_next_scaled = scaler.transform(X_next)
                    pred = model.predict(X_next_scaled)[0]
                else:
                    pred = model.predict(X_next)[0]
            else:
                pred = model.predict(X_next)[0]
            
            base_preds[name] = pred
        
        # Create meta-features
        meta_features = pd.DataFrame([base_preds])
        
        # Make prediction with meta-model
        forecast = meta_model.predict(meta_features)[0]
        
        # Store forecast
        forecasts.append(forecast)
        
        # Create next date
        next_date = last_date + pd.DateOffset(months=i+1)
    
    # Create a Series with the forecasts
    future_dates = pd.date_range(start=last_date + pd.DateOffset(months=1), periods=forecast_horizon, freq='MS')
    forecast_series = pd.Series(forecasts, index=future_dates)
    
    # Plot historical data with forecasts
    plt.figure(figsize=(14, 7))
    plt.plot(df[target_col].index, df[target_col], label='Historical Data')
    plt.plot(forecast_series.index, forecast_series, label='Stacking Forecast', color='red', linestyle='--')
    plt.title(f'Stacking Ensemble: {target_col} Forecast')
    plt.xlabel('Date')
    plt.ylabel(target_col)
    plt.legend()
    plt.grid(True)
    plt.tight_layout()
    plt.savefig(f'plots/stacking_future_forecast_{target_col}.png')
    plt.close()
    
    return forecast_series

def main():
    # Create plots directory if it doesn't exist
    import os
    if not os.path.exists('plots'):
        os.makedirs('plots')
    
    # Load data
    file_path = 'data/analyzed_time_series.csv'
    df = load_data(file_path)
    
    # Engineer features
    df_engineered = engineer_features(df)
    
    # Run stacking for CPI Year-over-Year
    target_col = 'cpi_yoy'
    meta_model_yoy, predictions_yoy, metrics_yoy, base_models_yoy, meta_features_test_yoy = run_stacking(
        df_engineered, target_col, test_size=0.2, validation_size=0.1, meta_model_type='Ridge'
    )
    
    # Generate future forecasts for YoY
    forecast_yoy = forecast_future_stacking(
        meta_model_yoy, base_models_yoy, df_engineered, target_col, forecast_horizon=24
    )
    print(f"\nFuture {target_col} forecasts:")
    print(forecast_yoy)
    
    # Run stacking for CPI Month-over-Month
    target_col = 'cpi_mom'
    meta_model_mom, predictions_mom, metrics_mom, base_models_mom, meta_features_test_mom = run_stacking(
        df_engineered, target_col, test_size=0.2, validation_size=0.1, meta_model_type='Ridge'
    )
    
    # Generate future forecasts for MoM
    forecast_mom = forecast_future_stacking(
        meta_model_mom, base_models_mom, df_engineered, target_col, forecast_horizon=24
    )
    print(f"\nFuture {target_col} forecasts:")
    print(forecast_mom)
    
    # Compare with other models (if available)
    try:
        # Try to load metrics from other models
        all_metrics = pd.read_csv('plots/all_models_comparison_metrics.csv')
        
        # Create stacking metrics dataframe
        stacking_metrics = pd.DataFrame([
            {
                'Model': 'Stacking', 'Target': 'CPI MoM',
                'Train_RMSE': None, 'Train_MAE': None, 
                'Train_MAPE': None, 'Train_R2': None,
                'Test_RMSE': metrics_mom['rmse'], 'Test_MAE': metrics_mom['mae'], 
                'Test_MAPE': metrics_mom['mape'], 'Test_R2': metrics_mom['r2']
            },
            {
                'Model': 'Stacking', 'Target': 'CPI YoY',
                'Train_RMSE': None, 'Train_MAE': None, 
                'Train_MAPE': None, 'Train_R2': None,
                'Test_RMSE': metrics_yoy['rmse'], 'Test_MAE': metrics_yoy['mae'], 
                'Test_MAPE': metrics_yoy['mape'], 'Test_R2': metrics_yoy['r2']
            }
        ])
        
        # Combine metrics
        combined_metrics = pd.concat([all_metrics, stacking_metrics])
        combined_metrics.to_csv('plots/final_model_comparison_metrics.csv', index=False)
        print("\nComparison with all models saved to plots/final_model_comparison_metrics.csv")
        
        # Create comparison plots
        for target, target_name in [('CPI MoM', 'cpi_mom'), ('CPI YoY', 'cpi_yoy')]:
            # Filter metrics for the target
            target_metrics = combined_metrics[combined_metrics['Target'] == target]
            
            # Compare test metrics
            metrics_to_compare = ['Test_RMSE', 'Test_MAE', 'Test_MAPE']
            
            plt.figure(figsize=(16, 8))
            
            # Create grouped bar chart
            x = np.arange(len(metrics_to_compare))
            width = 0.1
            
            # Plot bars for each model
            models = target_metrics['Model'].unique()
            for i, model_name in enumerate(models):
                model_data = target_metrics[target_metrics['Model'] == model_name]
                if not model_data.empty and not model_data[metrics_to_compare].isnull().any().any():
                    values = [model_data[metric].values[0] for metric in metrics_to_compare]
                    plt.bar(x + (i-len(models)/2+0.5)*width, values, width, label=model_name)
            
            plt.xlabel('Metric')
            plt.ylabel('Value')
            plt.title(f'All Models Comparison for {target}')
            plt.xticks(x, metrics_to_compare)
            plt.legend()
            plt.tight_layout()
            plt.savefig(f'plots/final_model_comparison_{target_name}.png')
            plt.close()
            
    except Exception as e:
        print(f"Could not compare with other models: {e}")
        # Save metrics to CSV
        metrics_data = [
            {
                'Model': 'Stacking', 'Target': 'CPI MoM',
                'Test_RMSE': metrics_mom['rmse'], 'Test_MAE': metrics_mom['mae'], 
                'Test_MAPE': metrics_mom['mape'], 'Test_R2': metrics_mom['r2']
            },
            {
                'Model': 'Stacking', 'Target': 'CPI YoY',
                'Test_RMSE': metrics_yoy['rmse'], 'Test_MAE': metrics_yoy['mae'], 
                'Test_MAPE': metrics_yoy['mape'], 'Test_R2': metrics_yoy['r2']
            }
        ]
        
        metrics_df = pd.DataFrame(metrics_data)
        metrics_df.to_csv('plots/stacking_metrics.csv', index=False)
        print("\nMetrics saved to plots/stacking_metrics.csv")

if __name__ == "__main__":
    main()

Running Stacking Ensemble for cpi_yoy...
Training data: (242, 63), Validation data: (27, 63), Test data: (68, 63)
Training Linear Regression...
Linear Regression - Validation RMSE: 0.0000, MAE: 0.0000, MAPE: 0.00%, R²: 1.0000
Training Ridge...
Ridge - Validation RMSE: 0.0532, MAE: 0.0498, MAPE: 0.05%, R²: 0.9951
Training Lasso...
Lasso - Validation RMSE: 0.0522, MAE: 0.0407, MAPE: 0.04%, R²: 0.9953
Training ElasticNet...
ElasticNet - Validation RMSE: 0.1017, MAE: 0.0882, MAPE: 0.09%, R²: 0.9821
Training Random Forest...
Random Forest - Validation RMSE: 0.0539, MAE: 0.0461, MAPE: 0.04%, R²: 0.9950
Training Gradient Boosting...
Gradient Boosting - Validation RMSE: 0.0556, MAE: 0.0434, MAPE: 0.04%, R²: 0.9946
Training XGBoost...
XGBoost - Validation RMSE: 0.0809, MAE: 0.0613, MAPE: 0.06%, R²: 0.9886
Training SVR...
SVR - Validation RMSE: 0.4099, MAE: 0.3248, MAPE: 0.31%, R²: 0.7086

Training Ridge meta-model...

Meta-model coefficients:
Linear Regression: 0.1994
Ridge: 0.1892
Lasso: 0.087

In [2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
from sklearn.linear_model import LinearRegression, Ridge
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor
import xgboost as xgb
from statsmodels.tsa.statespace.sarimax import SARIMAX
from statsmodels.tsa.arima.model import ARIMA
from pmdarima import auto_arima
import warnings
import logging
import os
warnings.filterwarnings('ignore')

# Configure logging
if not os.path.exists('logs'):
    os.makedirs('logs')
logging.basicConfig(
    level=logging.INFO,
    format='%(asctime)s - %(levelname)s - %(message)s',
    handlers=[
        logging.FileHandler('logs/hybrid_model_log.log'),
        logging.StreamHandler()
    ]
)
logger = logging.getLogger(__name__)

def load_data(file_path):
    """Load and prepare the time series data"""
    df = pd.read_csv(file_path)
    
    # Convert time column to datetime
    df['time'] = pd.to_datetime(df['time'])
    
    # Set time as index
    df.set_index('time', inplace=True)
    
    return df

def engineer_features(df):
    """Create additional features for modeling"""
    df_copy = df.copy()
    
    # Create lag features
    for i in range(1, 13):
        if 'cpi_mom' in df_copy.columns:
            df_copy[f'cpi_mom_lag_{i}'] = df_copy['cpi_mom'].shift(i)
        if 'cpi_yoy' in df_copy.columns:
            df_copy[f'cpi_yoy_lag_{i}'] = df_copy['cpi_yoy'].shift(i)
    
    # Create rolling window features
    for window in [3, 6, 12]:
        if 'cpi_mom' in df_copy.columns:
            df_copy[f'cpi_mom_rolling_mean_{window}'] = df_copy['cpi_mom'].rolling(window=window).mean()
            df_copy[f'cpi_mom_rolling_std_{window}'] = df_copy['cpi_mom'].rolling(window=window).std()
        if 'cpi_yoy' in df_copy.columns:
            df_copy[f'cpi_yoy_rolling_mean_{window}'] = df_copy['cpi_yoy'].rolling(window=window).mean()
            df_copy[f'cpi_yoy_rolling_std_{window}'] = df_copy['cpi_yoy'].rolling(window=window).std()
    
    # Create economic indicator lag features
    for i in range(1, 4):
        if 'oil_price' in df_copy.columns:
            df_copy[f'oil_price_lag_{i}'] = df_copy['oil_price'].shift(i)
        if 'gold_price' in df_copy.columns:
            df_copy[f'gold_price_lag_{i}'] = df_copy['gold_price'].shift(i)
        if 'interest_rate' in df_copy.columns:
            df_copy[f'interest_rate_lag_{i}'] = df_copy['interest_rate'].shift(i)
    
    # Add month and year as cyclical features
    if 'month' in df_copy.columns:
        df_copy['month_sin'] = np.sin(2 * np.pi * df_copy['month']/12)
        df_copy['month_cos'] = np.cos(2 * np.pi * df_copy['month']/12)
    
    # Create interaction features
    if all(col in df_copy.columns for col in ['oil_price', 'gold_price']):
        df_copy['oil_gold_ratio'] = df_copy['oil_price'] / df_copy['gold_price']
    
    # Drop rows with NaN values (due to lag features)
    df_clean = df_copy.dropna()
    
    return df_clean

def calculate_metrics(actual, predicted):
    """Calculate evaluation metrics"""
    mse = mean_squared_error(actual, predicted)
    rmse = np.sqrt(mse)
    mae = mean_absolute_error(actual, predicted)
    r2 = r2_score(actual, predicted)
    mape = np.mean(np.abs((actual - predicted) / actual)) * 100
    
    return rmse, mae, mape, r2

def prepare_data(df, target_col, test_size=0.2):
    """
    Prepare data for hybrid modeling
    
    Parameters:
    -----------
    df : pd.DataFrame
        Input dataframe with features and target
    target_col : str
        Name of the target column
    test_size : float
        Proportion of data to use for testing
        
    Returns:
    --------
    tuple
        Various data splits for different model components
    """
    # Define features and target
    X = df.drop([col for col in ['cpi_mom', 'cpi_yoy', target_col] if col in df.columns], axis=1)
    y = df[target_col]
    
    # Split data into training and testing sets (time-based split)
    split_test_idx = int(len(df) * (1 - test_size))
    
    # For time series models
    train_ts = y[:split_test_idx]
    test_ts = y[split_test_idx:]
    
    # For ML models
    X_train = X[:split_test_idx]
    X_test = X[split_test_idx:]
    y_train = y[:split_test_idx]
    y_test = y[split_test_idx:]
    
    # Scale features for ML models
    scaler = StandardScaler()
    X_train_scaled = scaler.fit_transform(X_train)
    X_test_scaled = scaler.transform(X_test)
    
    # Convert to DataFrame for easier handling
    X_train_scaled_df = pd.DataFrame(X_train_scaled, index=X_train.index, columns=X_train.columns)
    X_test_scaled_df = pd.DataFrame(X_test_scaled, index=X_test.index, columns=X_test.columns)
    
    return train_ts, test_ts, X_train, X_test, y_train, y_test, X_train_scaled_df, X_test_scaled_df, scaler

def run_arima_component(train, test, seasonal=True):
    """
    Run ARIMA/SARIMA component of the hybrid model
    
    Parameters:
    -----------
    train : pd.Series
        Training data
    test : pd.Series
        Test data
    seasonal : bool
        Whether to include seasonal components
        
    Returns:
    --------
    tuple
        Model, predictions, residuals
    """
    logger.info(f"Running ARIMA component with seasonal={seasonal}")
    
    try:
        # Find best parameters using auto_arima
        if seasonal:
            model = auto_arima(
                train,
                start_p=0, start_q=0, max_p=3, max_q=3, max_d=2,
                start_P=0, start_Q=0, max_P=2, max_Q=2, max_D=1,
                m=12,  # Monthly seasonality
                seasonal=True,
                trace=True,
                error_action='ignore',
                suppress_warnings=True,
                stepwise=True,
                information_criterion='aic'
            )
        else:
            model = auto_arima(
                train,
                start_p=0, start_q=0, max_p=3, max_q=3, max_d=2,
                seasonal=False,
                trace=True,
                error_action='ignore',
                suppress_warnings=True,
                stepwise=True,
                information_criterion='aic'
            )
        
        # Print model summary
        logger.info(f"Best ARIMA parameters: {model.order}, seasonal_order: {model.seasonal_order if seasonal else None}")
        
        # Make forecasts
        forecast = model.predict(n_periods=len(test))
        forecast = pd.Series(forecast, index=test.index)
        
        # Get residuals
        residuals = model.resid()
        
        # Calculate metrics
        rmse, mae, mape, r2 = calculate_metrics(test, forecast)
        logger.info(f"ARIMA component - RMSE: {rmse:.4f}, MAE: {mae:.4f}, MAPE: {mape:.2f}%, R²: {r2:.4f}")
        
        return model, forecast, residuals
    
    except Exception as e:
        logger.error(f"Error in ARIMA component: {e}")
        return None, None, None

def run_ml_component(X_train, X_test, y_train, y_test, X_train_scaled, X_test_scaled, model_type='XGBoost'):
    """
    Run machine learning component of the hybrid model
    
    Parameters:
    -----------
    X_train, X_test : pd.DataFrame
        Training and test features
    y_train, y_test : pd.Series
        Training and test targets
    X_train_scaled, X_test_scaled : pd.DataFrame
        Scaled training and test features
    model_type : str
        Type of ML model to use
        
    Returns:
    --------
    tuple
        Model, predictions
    """
    logger.info(f"Running ML component with model_type={model_type}")
    
    try:
        # Select model based on type
        if model_type == 'XGBoost':
            model = xgb.XGBRegressor(n_estimators=100, learning_rate=0.1, random_state=42)
            model.fit(X_train, y_train)
            forecast = model.predict(X_test)
        elif model_type == 'RandomForest':
            model = RandomForestRegressor(n_estimators=100, random_state=42)
            model.fit(X_train, y_train)
            forecast = model.predict(X_test)
        elif model_type == 'GradientBoosting':
            model = GradientBoostingRegressor(n_estimators=100, random_state=42)
            model.fit(X_train, y_train)
            forecast = model.predict(X_test)
        elif model_type == 'Ridge':
            model = Ridge(alpha=0.1, random_state=42)
            model.fit(X_train_scaled, y_train)
            forecast = model.predict(X_test_scaled)
        else:
            # Default to XGBoost
            model = xgb.XGBRegressor(n_estimators=100, learning_rate=0.1, random_state=42)
            model.fit(X_train, y_train)
            forecast = model.predict(X_test)
        
        # Convert to Series for easier handling
        forecast = pd.Series(forecast, index=X_test.index)
        
        # Calculate metrics
        rmse, mae, mape, r2 = calculate_metrics(y_test, forecast)
        logger.info(f"ML component ({model_type}) - RMSE: {rmse:.4f}, MAE: {mae:.4f}, MAPE: {mape:.2f}%, R²: {r2:.4f}")
        
        return model, forecast
    
    except Exception as e:
        logger.error(f"Error in ML component: {e}")
        return None, None

def run_hybrid_model(df, target_col, test_size=0.2, arima_weight=0.5, ml_model_type='XGBoost', seasonal=True):
    """
    Run hybrid model combining ARIMA and ML components
    
    Parameters:
    -----------
    df : pd.DataFrame
        Input dataframe with features and target
    target_col : str
        Name of the target column
    test_size : float
        Proportion of data to use for testing
    arima_weight : float
        Weight for ARIMA component in the ensemble (0-1)
    ml_model_type : str
        Type of ML model to use
    seasonal : bool
        Whether to include seasonal components in ARIMA
        
    Returns:
    --------
    tuple
        Hybrid model components, predictions, metrics
    """
    logger.info(f"Running hybrid model for {target_col} with arima_weight={arima_weight}, ml_model_type={ml_model_type}")
    
    # Prepare data
    train_ts, test_ts, X_train, X_test, y_train, y_test, X_train_scaled, X_test_scaled, scaler = prepare_data(
        df, target_col, test_size
    )
    
    # Run ARIMA component
    arima_model, arima_forecast, arima_residuals = run_arima_component(train_ts, test_ts, seasonal)
    
    # Run ML component
    ml_model, ml_forecast = run_ml_component(
        X_train, X_test, y_train, y_test, X_train_scaled, X_test_scaled, ml_model_type
    )
    
    # Combine forecasts
    if arima_forecast is not None and ml_forecast is not None:
        hybrid_forecast = arima_weight * arima_forecast + (1 - arima_weight) * ml_forecast
        
        # Calculate metrics
        rmse, mae, mape, r2 = calculate_metrics(test_ts, hybrid_forecast)
        logger.info(f"Hybrid model - RMSE: {rmse:.4f}, MAE: {mae:.4f}, MAPE: {mape:.2f}%, R²: {r2:.4f}")
        
        # Plot results
        plt.figure(figsize=(14, 7))
        plt.plot(test_ts.index, test_ts, label='Actual', color='black', linewidth=2)
        plt.plot(arima_forecast.index, arima_forecast, label='ARIMA', linestyle='--', alpha=0.7)
        plt.plot(ml_forecast.index, ml_forecast, label=f'ML ({ml_model_type})', linestyle='--', alpha=0.7)
        plt.plot(hybrid_forecast.index, hybrid_forecast, label='Hybrid', color='red', linewidth=2)
        plt.title(f'Hybrid Model: Actual vs Predicted {target_col}')
        plt.xlabel('Date')
        plt.ylabel(target_col)
        plt.legend()
        plt.grid(True)
        plt.tight_layout()
        
        # Create plots directory if it doesn't exist
        if not os.path.exists('plots'):
            os.makedirs('plots')
        
        plt.savefig(f'plots/hybrid_predictions_{target_col}.png')
        plt.close()
        
        # Plot error distribution
        plt.figure(figsize=(10, 6))
        errors = test_ts - hybrid_forecast
        sns.histplot(errors, kde=True)
        plt.title(f'Hybrid Model: Error Distribution for {target_col}')
        plt.xlabel('Error')
        plt.savefig(f'plots/hybrid_error_distribution_{target_col}.png')
        plt.close()
        
        # Create a DataFrame with predictions
        predictions = pd.DataFrame({
            'Actual': test_ts,
            'ARIMA': arima_forecast,
            f'ML ({ml_model_type})': ml_forecast,
            'Hybrid': hybrid_forecast,
            'Error': test_ts - hybrid_forecast
        })
        
        # Return results
        metrics = {
            'rmse': rmse,
            'mae': mae,
            'mape': mape,
            'r2': r2
        }
        
        components = {
            'arima_model': arima_model,
            'ml_model': ml_model,
            'arima_weight': arima_weight,
            'ml_model_type': ml_model_type,
            'scaler': scaler
        }
        
        return components, predictions, metrics
    
    else:
        logger.error("One or both model components failed")
        return None, None, None

def forecast_future_hybrid(components, df, target_col, forecast_horizon=24):
    """
    Generate future forecasts using the hybrid model
    
    Parameters:
    -----------
    components : dict
        Dictionary of hybrid model components
    df : pd.DataFrame
        Input dataframe with features
    target_col : str
        Name of the target column
    forecast_horizon : int
        Number of periods to forecast
        
    Returns:
    --------
    pd.Series
        Forecasted values
    """
    logger.info(f"Generating {forecast_horizon} period forecast")
    
    try:
        arima_model = components['arima_model']
        ml_model = components['ml_model']
        arima_weight = components['arima_weight']
        ml_model_type = components['ml_model_type']
        scaler = components['scaler']
        
        # Generate ARIMA forecasts
        arima_forecast = arima_model.predict(n_periods=forecast_horizon)
        
        # Get the last date in the dataframe
        last_date = df.index[-1]
        future_dates = pd.date_range(start=last_date + pd.DateOffset(months=1), periods=forecast_horizon, freq='MS')
        
        # Create a Series with the ARIMA forecasts
        arima_forecast = pd.Series(arima_forecast, index=future_dates)
        
        # Generate ML forecasts (simplified approach)
        # For a real implementation, you would need to generate future features
        # Here we'll use a naive approach by using the last available features
        X_last = df.drop([col for col in ['cpi_mom', 'cpi_yoy', target_col] if col in df.columns], axis=1).iloc[-1:].copy()
        ml_forecasts = []
        
        for i in range(forecast_horizon):
            # For tree-based models
            if ml_model_type in ['XGBoost', 'RandomForest', 'GradientBoosting']:
                pred = ml_model.predict(X_last)[0]
            # For linear models
            else:
                X_last_scaled = scaler.transform(X_last)
                pred = ml_model.predict(X_last_scaled)[0]
            
            ml_forecasts.append(pred)
        
        # Create a Series with the ML forecasts
        ml_forecast = pd.Series(ml_forecasts, index=future_dates)
        
        # Combine forecasts
        hybrid_forecast = arima_weight * arima_forecast + (1 - arima_weight) * ml_forecast
        
        # Plot future forecasts
        plt.figure(figsize=(14, 7))
        plt.plot(df[target_col].index, df[target_col], label='Historical Data')
        plt.plot(arima_forecast.index, arima_forecast, label='ARIMA Forecast', linestyle='--', alpha=0.7)
        plt.plot(ml_forecast.index, ml_forecast, label=f'ML ({ml_model_type}) Forecast', linestyle='--', alpha=0.7)
        plt.plot(hybrid_forecast.index, hybrid_forecast, label='Hybrid Forecast', color='red', linewidth=2)
        plt.title(f'Hybrid Model: {target_col} Future Forecast')
        plt.xlabel('Date')
        plt.ylabel(target_col)
        plt.legend()
        plt.grid(True)
        plt.tight_layout()
        plt.savefig(f'plots/hybrid_future_forecast_{target_col}.png')
        plt.close()
        
        return hybrid_forecast, arima_forecast, ml_forecast
    
    except Exception as e:
        logger.error(f"Error in future forecast: {e}")
        import traceback
        logger.error(traceback.format_exc())
        return None, None, None

def optimize_hybrid_weights(df, target_col, test_size=0.2, ml_model_type='XGBoost', seasonal=True):
    """
    Optimize the weights for the hybrid model
    
    Parameters:
    -----------
    df : pd.DataFrame
        Input dataframe with features and target
    target_col : str
        Name of the target column
    test_size : float
        Proportion of data to use for testing
    ml_model_type : str
        Type of ML model to use
    seasonal : bool
        Whether to include seasonal components in ARIMA
        
    Returns:
    --------
    float
        Optimal weight for ARIMA component
    """
    logger.info(f"Optimizing hybrid weights for {target_col}")
    
    # Prepare data
    train_ts, test_ts, X_train, X_test, y_train, y_test, X_train_scaled, X_test_scaled, scaler = prepare_data(
        df, target_col, test_size
    )
    
    # Run ARIMA component
    arima_model, arima_forecast, arima_residuals = run_arima_component(train_ts, test_ts, seasonal)
    
    # Run ML component
    ml_model, ml_forecast = run_ml_component(
        X_train, X_test, y_train, y_test, X_train_scaled, X_test_scaled, ml_model_type
    )
    
    # Try different weights
    weights = np.linspace(0, 1, 21)  # 0.0, 0.05, 0.1, ..., 0.95, 1.0
    best_rmse = float('inf')
    best_weight = 0.5  # Default
    
    results = []
    
    for weight in weights:
        hybrid_forecast = weight * arima_forecast + (1 - weight) * ml_forecast
        rmse, mae, mape, r2 = calculate_metrics(test_ts, hybrid_forecast)
        results.append((weight, rmse, mae, mape, r2))
        
        if rmse < best_rmse:
            best_rmse = rmse
            best_weight = weight
    
    logger.info(f"Best weight for ARIMA component: {best_weight:.2f} with RMSE: {best_rmse:.4f}")
    
    # Plot weight optimization results
    plt.figure(figsize=(10, 6))
    weights_df = pd.DataFrame(results, columns=['Weight', 'RMSE', 'MAE', 'MAPE', 'R2'])
    plt.plot(weights_df['Weight'], weights_df['RMSE'], marker='o')
    plt.axvline(x=best_weight, color='r', linestyle='--', label=f'Best Weight: {best_weight:.2f}')
    plt.title(f'Hybrid Model Weight Optimization for {target_col}')
    plt.xlabel('ARIMA Weight')
    plt.ylabel('RMSE')
    plt.grid(True)
    plt.legend()
    plt.tight_layout()
    plt.savefig(f'plots/hybrid_weight_optimization_{target_col}.png')
    plt.close()
    
    return best_weight

def main():
    # Load data
    file_path = 'data/analyzed_time_series.csv'
    df = load_data(file_path)
    
    # Engineer features
    df_engineered = engineer_features(df)
    
    # Run hybrid model for CPI Year-over-Year
    target_col = 'cpi_yoy'
    
    # Optimize weights
    optimal_weight = optimize_hybrid_weights(df_engineered, target_col, test_size=0.2, ml_model_type='XGBoost')
    
    # Run hybrid model with optimal weight
    components_yoy, predictions_yoy, metrics_yoy = run_hybrid_model(
        df_engineered, target_col, test_size=0.2, arima_weight=optimal_weight, ml_model_type='XGBoost'
    )
    
    # Generate future forecasts for YoY
    if components_yoy is not None:
        forecast_yoy, arima_forecast_yoy, ml_forecast_yoy = forecast_future_hybrid(
            components_yoy, df_engineered, target_col, forecast_horizon=24
        )
        logger.info(f"\nFuture {target_col} forecasts:")
        logger.info(forecast_yoy)
    
    # Run hybrid model for CPI Month-over-Month
    target_col = 'cpi_mom'
    
    # Optimize weights
    optimal_weight = optimize_hybrid_weights(df_engineered, target_col, test_size=0.2, ml_model_type='XGBoost')
    
    # Run hybrid model with optimal weight
    components_mom, predictions_mom, metrics_mom = run_hybrid_model(
        df_engineered, target_col, test_size=0.2, arima_weight=optimal_weight, ml_model_type='XGBoost'
    )
    
    # Generate future forecasts for MoM
    if components_mom is not None:
        forecast_mom, arima_forecast_mom, ml_forecast_mom = forecast_future_hybrid(
            components_mom, df_engineered, target_col, forecast_horizon=24
        )
        logger.info(f"\nFuture {target_col} forecasts:")
        logger.info(forecast_mom)
    
    # Compare with other models (if available)
    try:
        # Try to load metrics from other models
        all_metrics = pd.read_csv('plots/final_model_comparison_metrics.csv')
        
        # Create hybrid metrics dataframe
        hybrid_metrics = pd.DataFrame([
            {
                'Model': 'Hybrid', 'Target': 'CPI MoM',
                'Train_RMSE': None, 'Train_MAE': None, 
                'Train_MAPE': None, 'Train_R2': None,
                'Test_RMSE': metrics_mom['rmse'], 'Test_MAE': metrics_mom['mae'], 
                'Test_MAPE': metrics_mom['mape'], 'Test_R2': metrics_mom['r2']
            },
            {
                'Model': 'Hybrid', 'Target': 'CPI YoY',
                'Train_RMSE': None, 'Train_MAE': None, 
                'Train_MAPE': None, 'Train_R2': None,
                'Test_RMSE': metrics_yoy['rmse'], 'Test_MAE': metrics_yoy['mae'], 
                'Test_MAPE': metrics_yoy['mape'], 'Test_R2': metrics_yoy['r2']
            }
        ])
        
        # Combine metrics
        combined_metrics = pd.concat([all_metrics, hybrid_metrics])
        combined_metrics.to_csv('plots/final_model_comparison_metrics.csv', index=False)
        logger.info("\nComparison with all models saved to plots/final_model_comparison_metrics.csv")
        
    except Exception as e:
        logger.warning(f"Could not compare with other models: {e}")
        # Save metrics to CSV
        metrics_data = [
            {
                'Model': 'Hybrid', 'Target': 'CPI MoM',
                'Test_RMSE': metrics_mom['rmse'], 'Test_MAE': metrics_mom['mae'], 
                'Test_MAPE': metrics_mom['mape'], 'Test_R2': metrics_mom['r2']
            },
            {
                'Model': 'Hybrid', 'Target': 'CPI YoY',
                'Test_RMSE': metrics_yoy['rmse'], 'Test_MAE': metrics_yoy['mae'], 
                'Test_MAPE': metrics_yoy['mape'], 'Test_R2': metrics_yoy['r2']
            }
        ]
        
        metrics_df = pd.DataFrame(metrics_data)
        metrics_df.to_csv('plots/hybrid_metrics.csv', index=False)
        logger.info("\nMetrics saved to plots/hybrid_metrics.csv")

if __name__ == "__main__":
    main()

2025-05-09 23:26:46,908 - INFO - Optimizing hybrid weights for cpi_yoy
2025-05-09 23:26:46,918 - INFO - Running ARIMA component with seasonal=True


Performing stepwise search to minimize aic
 ARIMA(0,1,0)(0,0,0)[12] intercept   : AIC=771.285, Time=0.03 sec
 ARIMA(1,1,0)(1,0,0)[12] intercept   : AIC=516.945, Time=0.18 sec
 ARIMA(0,1,1)(0,0,1)[12] intercept   : AIC=523.336, Time=0.22 sec
 ARIMA(0,1,0)(0,0,0)[12]             : AIC=769.294, Time=0.03 sec
 ARIMA(1,1,0)(0,0,0)[12] intercept   : AIC=590.401, Time=0.06 sec
 ARIMA(1,1,0)(2,0,0)[12] intercept   : AIC=489.897, Time=0.53 sec
 ARIMA(1,1,0)(2,0,1)[12] intercept   : AIC=471.281, Time=0.90 sec
 ARIMA(1,1,0)(1,0,1)[12] intercept   : AIC=470.575, Time=0.25 sec
 ARIMA(1,1,0)(0,0,1)[12] intercept   : AIC=469.818, Time=0.31 sec
 ARIMA(1,1,0)(0,0,2)[12] intercept   : AIC=470.313, Time=0.59 sec
 ARIMA(1,1,0)(1,0,2)[12] intercept   : AIC=inf, Time=1.64 sec
 ARIMA(0,1,0)(0,0,1)[12] intercept   : AIC=654.754, Time=0.19 sec
 ARIMA(2,1,0)(0,0,1)[12] intercept   : AIC=471.498, Time=0.28 sec
 ARIMA(1,1,1)(0,0,1)[12] intercept   : AIC=471.366, Time=0.53 sec
 ARIMA(2,1,1)(0,0,1)[12] intercept   

2025-05-09 23:26:57,535 - INFO - Best ARIMA parameters: (1, 1, 0), seasonal_order: (0, 0, 1, 12)
2025-05-09 23:26:57,547 - INFO - ARIMA component - RMSE: 1.5938, MAE: 1.2600, MAPE: 1.23%, R²: -0.7079
2025-05-09 23:26:57,548 - INFO - Running ML component with model_type=XGBoost


 ARIMA(2,1,1)(0,0,1)[12]             : AIC=469.363, Time=0.41 sec

Best model:  ARIMA(1,1,0)(0,0,1)[12]          
Total fit time: 10.585 seconds


2025-05-09 23:26:59,771 - INFO - ML component (XGBoost) - RMSE: 0.0808, MAE: 0.0510, MAPE: 0.05%, R²: 0.9956
2025-05-09 23:26:59,843 - INFO - Best weight for ARIMA component: 0.00 with RMSE: 0.0808
2025-05-09 23:27:00,067 - INFO - Running hybrid model for cpi_yoy with arima_weight=0.0, ml_model_type=XGBoost
2025-05-09 23:27:00,093 - INFO - Running ARIMA component with seasonal=True


Performing stepwise search to minimize aic
 ARIMA(0,1,0)(0,0,0)[12] intercept   : AIC=771.285, Time=0.03 sec
 ARIMA(1,1,0)(1,0,0)[12] intercept   : AIC=516.945, Time=0.18 sec
 ARIMA(0,1,1)(0,0,1)[12] intercept   : AIC=523.336, Time=0.22 sec
 ARIMA(0,1,0)(0,0,0)[12]             : AIC=769.294, Time=0.03 sec
 ARIMA(1,1,0)(0,0,0)[12] intercept   : AIC=590.401, Time=0.14 sec
 ARIMA(1,1,0)(2,0,0)[12] intercept   : AIC=489.897, Time=0.59 sec
 ARIMA(1,1,0)(2,0,1)[12] intercept   : AIC=471.281, Time=0.84 sec
 ARIMA(1,1,0)(1,0,1)[12] intercept   : AIC=470.575, Time=0.25 sec
 ARIMA(1,1,0)(0,0,1)[12] intercept   : AIC=469.818, Time=0.28 sec
 ARIMA(1,1,0)(0,0,2)[12] intercept   : AIC=470.313, Time=0.59 sec
 ARIMA(1,1,0)(1,0,2)[12] intercept   : AIC=inf, Time=1.86 sec
 ARIMA(0,1,0)(0,0,1)[12] intercept   : AIC=654.754, Time=0.18 sec
 ARIMA(2,1,0)(0,0,1)[12] intercept   : AIC=471.498, Time=0.32 sec
 ARIMA(1,1,1)(0,0,1)[12] intercept   : AIC=471.366, Time=0.51 sec
 ARIMA(2,1,1)(0,0,1)[12] intercept   

2025-05-09 23:27:10,652 - INFO - Best ARIMA parameters: (1, 1, 0), seasonal_order: (0, 0, 1, 12)
2025-05-09 23:27:10,666 - INFO - ARIMA component - RMSE: 1.5938, MAE: 1.2600, MAPE: 1.23%, R²: -0.7079
2025-05-09 23:27:10,666 - INFO - Running ML component with model_type=XGBoost


 ARIMA(2,1,1)(0,0,1)[12]             : AIC=469.363, Time=0.41 sec

Best model:  ARIMA(1,1,0)(0,0,1)[12]          
Total fit time: 10.521 seconds


2025-05-09 23:27:10,927 - INFO - ML component (XGBoost) - RMSE: 0.0808, MAE: 0.0510, MAPE: 0.05%, R²: 0.9956
2025-05-09 23:27:10,932 - INFO - Hybrid model - RMSE: 0.0808, MAE: 0.0510, MAPE: 0.05%, R²: 0.9956
2025-05-09 23:27:11,348 - INFO - Generating 24 period forecast
2025-05-09 23:27:12,010 - INFO - 
Future cpi_yoy forecasts:
2025-05-09 23:27:12,012 - INFO - 2025-01-01   NaN
2025-02-01   NaN
2025-03-01   NaN
2025-04-01   NaN
2025-05-01   NaN
2025-06-01   NaN
2025-07-01   NaN
2025-08-01   NaN
2025-09-01   NaN
2025-10-01   NaN
2025-11-01   NaN
2025-12-01   NaN
2026-01-01   NaN
2026-02-01   NaN
2026-03-01   NaN
2026-04-01   NaN
2026-05-01   NaN
2026-06-01   NaN
2026-07-01   NaN
2026-08-01   NaN
2026-09-01   NaN
2026-10-01   NaN
2026-11-01   NaN
2026-12-01   NaN
Freq: MS, dtype: float64
2025-05-09 23:27:12,015 - INFO - Optimizing hybrid weights for cpi_mom
2025-05-09 23:27:12,026 - INFO - Running ARIMA component with seasonal=True


Performing stepwise search to minimize aic
 ARIMA(0,0,0)(0,0,0)[12] intercept   : AIC=650.475, Time=0.05 sec
 ARIMA(1,0,0)(1,0,0)[12] intercept   : AIC=inf, Time=1.23 sec
 ARIMA(0,0,1)(0,0,1)[12] intercept   : AIC=546.290, Time=0.15 sec
 ARIMA(0,0,0)(0,0,0)[12]             : AIC=3245.698, Time=0.02 sec
 ARIMA(0,0,1)(0,0,0)[12] intercept   : AIC=582.149, Time=0.07 sec
 ARIMA(0,0,1)(1,0,1)[12] intercept   : AIC=548.279, Time=0.80 sec
 ARIMA(0,0,1)(0,0,2)[12] intercept   : AIC=533.499, Time=0.45 sec
 ARIMA(0,0,1)(1,0,2)[12] intercept   : AIC=inf, Time=3.00 sec
 ARIMA(0,0,0)(0,0,2)[12] intercept   : AIC=625.746, Time=0.24 sec
 ARIMA(1,0,1)(0,0,2)[12] intercept   : AIC=496.220, Time=1.86 sec
 ARIMA(1,0,1)(0,0,1)[12] intercept   : AIC=515.101, Time=0.76 sec
 ARIMA(1,0,1)(1,0,2)[12] intercept   : AIC=inf, Time=3.93 sec
 ARIMA(1,0,1)(1,0,1)[12] intercept   : AIC=503.227, Time=0.91 sec
 ARIMA(1,0,0)(0,0,2)[12] intercept   : AIC=494.641, Time=1.35 sec
 ARIMA(1,0,0)(0,0,1)[12] intercept   : AIC=5

2025-05-09 23:27:36,814 - INFO - Best ARIMA parameters: (1, 0, 0), seasonal_order: (0, 0, 2, 12)
2025-05-09 23:27:36,831 - INFO - ARIMA component - RMSE: 0.5176, MAE: 0.3955, MAPE: 0.39%, R²: -0.1886
2025-05-09 23:27:36,832 - INFO - Running ML component with model_type=XGBoost


 ARIMA(1,0,0)(0,0,2)[12]             : AIC=552.384, Time=0.23 sec

Best model:  ARIMA(1,0,0)(0,0,2)[12] intercept
Total fit time: 24.762 seconds


2025-05-09 23:27:37,166 - INFO - ML component (XGBoost) - RMSE: 0.0784, MAE: 0.0203, MAPE: 0.02%, R²: 0.9728
2025-05-09 23:27:37,239 - INFO - Best weight for ARIMA component: 0.00 with RMSE: 0.0784
2025-05-09 23:27:37,415 - INFO - Running hybrid model for cpi_mom with arima_weight=0.0, ml_model_type=XGBoost
2025-05-09 23:27:37,426 - INFO - Running ARIMA component with seasonal=True


Performing stepwise search to minimize aic
 ARIMA(0,0,0)(0,0,0)[12] intercept   : AIC=650.475, Time=0.05 sec
 ARIMA(1,0,0)(1,0,0)[12] intercept   : AIC=inf, Time=1.18 sec
 ARIMA(0,0,1)(0,0,1)[12] intercept   : AIC=546.290, Time=0.18 sec
 ARIMA(0,0,0)(0,0,0)[12]             : AIC=3245.698, Time=0.02 sec
 ARIMA(0,0,1)(0,0,0)[12] intercept   : AIC=582.149, Time=0.07 sec
 ARIMA(0,0,1)(1,0,1)[12] intercept   : AIC=548.279, Time=0.84 sec
 ARIMA(0,0,1)(0,0,2)[12] intercept   : AIC=533.499, Time=0.40 sec
 ARIMA(0,0,1)(1,0,2)[12] intercept   : AIC=inf, Time=3.13 sec
 ARIMA(0,0,0)(0,0,2)[12] intercept   : AIC=625.746, Time=0.23 sec
 ARIMA(1,0,1)(0,0,2)[12] intercept   : AIC=496.220, Time=1.91 sec
 ARIMA(1,0,1)(0,0,1)[12] intercept   : AIC=515.101, Time=0.86 sec
 ARIMA(1,0,1)(1,0,2)[12] intercept   : AIC=inf, Time=3.87 sec
 ARIMA(1,0,1)(1,0,1)[12] intercept   : AIC=503.227, Time=1.03 sec
 ARIMA(1,0,0)(0,0,2)[12] intercept   : AIC=494.641, Time=1.40 sec
 ARIMA(1,0,0)(0,0,1)[12] intercept   : AIC=5

2025-05-09 23:28:02,430 - INFO - Best ARIMA parameters: (1, 0, 0), seasonal_order: (0, 0, 2, 12)
2025-05-09 23:28:02,445 - INFO - ARIMA component - RMSE: 0.5176, MAE: 0.3955, MAPE: 0.39%, R²: -0.1886
2025-05-09 23:28:02,446 - INFO - Running ML component with model_type=XGBoost


 ARIMA(1,0,0)(0,0,2)[12]             : AIC=552.384, Time=0.22 sec

Best model:  ARIMA(1,0,0)(0,0,2)[12] intercept
Total fit time: 24.980 seconds


2025-05-09 23:28:02,738 - INFO - ML component (XGBoost) - RMSE: 0.0784, MAE: 0.0203, MAPE: 0.02%, R²: 0.9728
2025-05-09 23:28:02,743 - INFO - Hybrid model - RMSE: 0.0784, MAE: 0.0203, MAPE: 0.02%, R²: 0.9728
2025-05-09 23:28:03,229 - INFO - Generating 24 period forecast
2025-05-09 23:28:03,859 - INFO - 
Future cpi_mom forecasts:
2025-05-09 23:28:03,860 - INFO - 2025-01-01   NaN
2025-02-01   NaN
2025-03-01   NaN
2025-04-01   NaN
2025-05-01   NaN
2025-06-01   NaN
2025-07-01   NaN
2025-08-01   NaN
2025-09-01   NaN
2025-10-01   NaN
2025-11-01   NaN
2025-12-01   NaN
2026-01-01   NaN
2026-02-01   NaN
2026-03-01   NaN
2026-04-01   NaN
2026-05-01   NaN
2026-06-01   NaN
2026-07-01   NaN
2026-08-01   NaN
2026-09-01   NaN
2026-10-01   NaN
2026-11-01   NaN
2026-12-01   NaN
Freq: MS, dtype: float64
2025-05-09 23:28:03,867 - INFO - 
Metrics saved to plots/hybrid_metrics.csv
