In [1]:
import numpy as np
import pandas as pd
from datetime import datetime, timedelta
from pycaret.time_series import TSForecastingExperiment
import plotly.graph_objects as go
import math
from plotly.subplots import make_subplots
from datetime import datetime
import sys
from pathlib import Path

# Add project root to path (assuming notebook is in the notebooks directory)
project_root = Path().absolute().parent
sys.path.append(str(project_root))

# Local imports
from src.core.bloomberg_fetcher import fetch_bloomberg_data
from src import trans_utils  # Using the one from src root since that's what you have open
from src.utils import merge_utils  # Using the one from utils since that's what you have open
from src.utils.csv_exporter import export_to_csv

In [2]:
# Getting all the data 
mapping = {
    ('I05510CA Index', 'INDEX_OAS_TSY_BP'): 'cad_oas',
    ('LF98TRUU Index', 'INDEX_OAS_TSY_BP'): 'us_hy_oas',
    ('LUACTRUU Index', 'INDEX_OAS_TSY_BP'): 'us_ig_oas',
    ('SPTSX Index', 'PX_LAST'): 'tsx',
    ('VIX Index', 'PX_LAST'): 'vix',
    ('USYC3M30 Index', 'PX_LAST'): 'us_3m_10y',
    ('BCMPUSGR Index', 'PX_LAST'): 'us_growth_surprises',
    ('BCMPUSIF Index', 'PX_LAST'): 'us_inflation_surprises',
    ('LEI YOY  Index', 'PX_LAST'): 'us_lei_yoy',
    ('.HARDATA G Index', 'PX_LAST'): 'us_hard_data_surprises',
    ('CGERGLOB Index', 'PX_LAST'): 'us_equity_revisions',
    ('.ECONREGI G Index', 'PX_LAST'): 'us_economic_regime',
 
}

# Calculate dates
end_date = datetime.now().strftime('%Y-%m-%d')
start_date ='2002-01-01'

# Fetch the data
df = fetch_bloomberg_data(
    mapping=mapping,
    start_date=start_date,
    end_date=end_date,
    periodicity='M',
    align_start=True
).dropna()

# Getting all the er_ytd data 
mapping1 = {
    ('I05510CA Index', 'INDEX_EXCESS_RETURN_YTD'): 'cad_ig_er',
    ('LF98TRUU Index', 'INDEX_EXCESS_RETURN_YTD'): 'us_hy_er',
    ('LUACTRUU Index', 'INDEX_EXCESS_RETURN_YTD'): 'us_ig_er',
}

# Fetch the er_ytd_data
df1 = fetch_bloomberg_data(
    mapping=mapping1,
    start_date=start_date,
    end_date=end_date,
    periodicity='M',
    align_start=True
).dropna()

# Conver er_ytd data to an index
df2= trans_utils.convert_er_ytd_to_index(df1[['cad_ig_er','us_hy_er','us_ig_er']])
final_df=merge_utils.merge_dfs(df, df2, fill='ffill', start_date_align='yes')

# Handle bad data point for cad_oas on Nov 15 2005
bad_date = '2005-11-15'
if bad_date in final_df.index:
    final_df.loc[bad_date, 'cad_oas'] = final_df.loc[final_df.index < bad_date, 'cad_oas'].iloc[-1]


#  Fill the first row with 100 (starting index value)
final_df[['cad_ig_er_index', 'us_hy_er_index', 'us_ig_er_index']] = final_df[['cad_ig_er_index', 'us_hy_er_index', 'us_ig_er_index']].fillna(100)
# Drop the er_index columns
final_df = final_df.drop(['cad_ig_er_index', 'us_hy_er_index', 'us_ig_er_index'], axis=1)


# Calculate TSX percent changes for different periods
final_df['tsx_1m'] = final_df['tsx'].pct_change(periods=1) * 100
final_df['tsx_3m'] = final_df['tsx'].pct_change(periods=3) * 100
final_df['tsx_6m'] = final_df['tsx'].pct_change(periods=6) * 100
final_df['tsx_12m'] = final_df['tsx'].pct_change(periods=12) * 100

# Drop the original tsx column
final_df = final_df.drop('tsx', axis=1)

# Drop any NA rows that resulted from the calculations
final_df = final_df.dropna()

# Export the final DataFrame to CSV, overwriting if exists
export_path = export_to_csv(
    data=final_df,
    name='monthly_oas_pycaret',
    export_dir='c:/Users/Eddy/Documents/auto_ml/csv_outputs',
    overwrite=True  # Explicitly set to overwrite existing file
)

print(final_df.info())
print('-----------------------------')
print('-----------------------------')
print(final_df.head())
print('-----------------------------')
print('-----------------------------')
print(final_df.tail())

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 331 entries, 2003-08-29 to 2024-12-31
Data columns (total 15 columns):
 #   Column                  Non-Null Count  Dtype  
---  ------                  --------------  -----  
 0   cad_oas                 331 non-null    float64
 1   us_hy_oas               331 non-null    float64
 2   us_ig_oas               331 non-null    float64
 3   vix                     331 non-null    float64
 4   us_3m_10y               331 non-null    float64
 5   us_growth_surprises     331 non-null    float64
 6   us_inflation_surprises  331 non-null    float64
 7   us_lei_yoy              331 non-null    float64
 8   us_hard_data_surprises  331 non-null    float64
 9   us_equity_revisions     331 non-null    float64
 10  us_economic_regime      331 non-null    float64
 11  tsx_1m                  331 non-null    float64
 12  tsx_3m                  331 non-null    float64
 13  tsx_6m                  331 non-null    float64
 14  tsx_12m                

In [10]:
import pandas as pd
from pycaret.time_series import *
import numpy as np
from datetime import datetime
import logging
import os

# Configure logging
logging.basicConfig(
    level=logging.INFO,
    format='%(asctime)s - %(levelname)s - %(message)s'
)

def load_and_prepare_data(file_path):
    """
    Load and prepare the data for time series modeling
    
    Args:
        file_path: Path to the CSV file containing the data
        
    Returns:
        pd.DataFrame: Prepared dataframe with datetime index
    """
    try:
        logging.info(f"Loading data from {file_path}")
        df = pd.read_csv(file_path)
        
        # Convert Date column to datetime and set as index
        df['Date'] = pd.to_datetime(df['Date'])
        df.set_index('Date', inplace=True)
        
        # Sort index to ensure chronological order
        df = df.sort_index()
        
        # Ensure monthly frequency and forward fill any missing values
        df = df.asfreq('M', method='ffill')
        
        logging.info(f"Data loaded successfully. Shape: {df.shape}")
        return df
    except Exception as e:
        logging.error(f"Error loading data: {str(e)}")
        raise

def train_predict_model(data):
    """
    Train a time series model and make predictions
    
    Args:
        data: DataFrame with the time series data
        
    Returns:
        tuple: (final_model, model_metrics)
    """
    try:
        logging.info("Initializing model setup")
        s = setup(
            data=data,
            target='cad_oas',
            fh=1,
            fold=3,
            seasonal_period=12,
            fold_strategy='expanding',
            transform_target=None,
            session_id=123,
            verbose=True
        )
        
        # Include only the ML models we know work with our setup
        include = [
            'lr_cds_dt', 'en_cds_dt', 'ridge_cds_dt', 'lasso_cds_dt', 'br_cds_dt',  # Linear models
            'huber_cds_dt', 'omp_cds_dt', 'knn_cds_dt',  # Other regression models
            'dt_cds_dt', 'rf_cds_dt', 'et_cds_dt', 'gbr_cds_dt',  # Tree models
            'ada_cds_dt', 'lightgbm_cds_dt', 'catboost_cds_dt'  # Boosting models
        ]
        
        # Compare models
        logging.info("Training and comparing models")
        best_model = compare_models(sort='MASE', include=include)
        
        # Get model performance metrics
        logging.info("Getting model performance metrics")
        model_metrics = pull()
        
        # Finalize the best model
        logging.info("Finalizing best model")
        final_model = finalize_model(best_model)
        
        return final_model, model_metrics
    except Exception as e:
        logging.error(f"Error in model training/prediction: {str(e)}")
        raise

def analyze_model_performance(model, data):
    """
    Analyze model performance metrics and make predictions
    
    Args:
        model: Trained model from PyCaret
        data: Training data DataFrame
    """
    try:
        # Get performance metrics
        metrics = pull()
        print("\nModel Performance Metrics:")
        print(metrics)
        
        # Get exogenous variables for prediction
        exog_data = data.drop('cad_oas', axis=1).iloc[-1:].copy()
        
        # Make predictions
        logging.info("Making predictions")
        predictions = predict_model(model, X=exog_data)
        
        # Save predictions
        csv_path = 'csv_outputs/cad_oas_predictions.csv'
        os.makedirs(os.path.dirname(csv_path), exist_ok=True)  # Create directory if it doesn't exist
        predictions.to_csv(csv_path)
        logging.info(f"Predictions saved to {os.path.abspath(csv_path)}")
        
        # Get last actual and predicted values
        last_actual = data['cad_oas'].iloc[-1]
        # Get the prediction column (should be the last column)
        pred_col = predictions.columns[-1]
        next_pred = predictions[pred_col].iloc[-1]
        
        print("\nPrediction Results:")
        print(f"Last actual value: {last_actual:.4f}")
        print(f"Predicted next value: {next_pred:.4f}")
        
    except Exception as e:
        logging.error(f"Error analyzing model performance: {str(e)}")
        raise

def main():
    """
    Main function to orchestrate the model training and prediction process
    """
    try:
        # File path
        file_path = 'c:/Users/Eddy/Documents/auto_ml/csv_outputs/monthly_oas_pycaret.csv'
        
        # Load and prepare data
        data = load_and_prepare_data(file_path)
        
        # Train model and get predictions
        model, metrics = train_predict_model(data)
        
        # Analyze model performance
        analyze_model_performance(model, data)
        
    except Exception as e:
        logging.error(f"Error in main execution: {str(e)}")
        raise

if __name__ == "__main__":
    main()

2025-01-25 19:55:49,949 - INFO - Loading data from c:/Users/Eddy/Documents/auto_ml/csv_outputs/monthly_oas_pycaret.csv
2025-01-25 19:55:49,956 - INFO - Data loaded successfully. Shape: (257, 15)
2025-01-25 19:55:49,956 - INFO - Initializing model setup


Unnamed: 0,Description,Value
0,session_id,123
1,Target,cad_oas
2,Approach,Univariate
3,Exogenous Variables,Present
4,Original data shape,"(257, 15)"
5,Transformed data shape,"(257, 15)"
6,Transformed train set shape,"(256, 15)"
7,Transformed test set shape,"(1, 15)"
8,Rows with missing values,0.0%
9,Fold Generator,ExpandingWindowSplitter


2025-01-25 19:55:50,292 - INFO - Training and comparing models


Unnamed: 0,Model,MASE,RMSSE,MAE,RMSE,MAPE,SMAPE,TT (Sec)
ada_cds_dt,AdaBoost w/ Cond. Deseasonalize & Detrending,0.2917,0.1638,2.5009,2.5009,0.0243,0.0237,0.04
br_cds_dt,Bayesian Ridge w/ Cond. Deseasonalize & Detrending,0.3594,0.2018,3.0829,3.0829,0.0292,0.0286,1.4667
en_cds_dt,Elastic Net w/ Cond. Deseasonalize & Detrending,0.3619,0.2032,3.1046,3.1046,0.0295,0.0289,1.4033
lasso_cds_dt,Lasso w/ Cond. Deseasonalize & Detrending,0.3884,0.218,3.3318,3.3318,0.0315,0.0309,1.45
lightgbm_cds_dt,Light Gradient Boosting w/ Cond. Deseasonalize & Detrending,0.5248,0.2947,4.5025,4.5025,0.0427,0.0415,0.1633
gbr_cds_dt,Gradient Boosting w/ Cond. Deseasonalize & Detrending,0.5449,0.306,4.6738,4.6738,0.0447,0.0433,0.06
et_cds_dt,Extra Trees w/ Cond. Deseasonalize & Detrending,0.6062,0.3402,5.2026,5.2026,0.0479,0.0467,0.0467
omp_cds_dt,Orthogonal Matching Pursuit w/ Cond. Deseasonalize & Detrending,0.6357,0.3568,5.4576,5.4576,0.0493,0.0508,1.3533
huber_cds_dt,Huber w/ Cond. Deseasonalize & Detrending,0.6809,0.3822,5.8424,5.8424,0.0545,0.0529,1.44
catboost_cds_dt,CatBoost Regressor w/ Cond. Deseasonalize & Detrending,0.7633,0.4284,6.5509,6.5509,0.0604,0.0586,0.8


2025-01-25 19:56:29,247 - INFO - Getting model performance metrics
2025-01-25 19:56:29,247 - INFO - Finalizing best model
2025-01-25 19:56:29,636 - INFO - Making predictions



Model Performance Metrics:
                                                             Model    MASE  \
ada_cds_dt            AdaBoost w/ Cond. Deseasonalize & Detrending  0.2917   
br_cds_dt        Bayesian Ridge w/ Cond. Deseasonalize & Detren...  0.3594   
en_cds_dt          Elastic Net w/ Cond. Deseasonalize & Detrending  0.3619   
lasso_cds_dt             Lasso w/ Cond. Deseasonalize & Detrending  0.3884   
lightgbm_cds_dt  Light Gradient Boosting w/ Cond. Deseasonalize...  0.5248   
gbr_cds_dt       Gradient Boosting w/ Cond. Deseasonalize & Det...  0.5449   
et_cds_dt          Extra Trees w/ Cond. Deseasonalize & Detrending  0.6062   
omp_cds_dt       Orthogonal Matching Pursuit w/ Cond. Deseasona...  0.6357   
huber_cds_dt             Huber w/ Cond. Deseasonalize & Detrending  0.6809   
catboost_cds_dt  CatBoost Regressor w/ Cond. Deseasonalize & De...  0.7633   
dt_cds_dt        Decision Tree w/ Cond. Deseasonalize & Detrending  0.7845   
rf_cds_dt        Random Forest w/ Co

2025-01-25 19:56:29,815 - INFO - Predictions saved to c:\Users\Eddy\Documents\auto_ml\notebooks\csv_outputs\cad_oas_predictions.csv



Prediction Results:
Last actual value: 98.5117
Predicted next value: 101.9598


In [14]:
import pandas as pd
from pycaret.time_series import *
import numpy as np
from datetime import datetime
import logging
import os
import matplotlib.pyplot as plt
import seaborn as sns

# Configure logging
logging.basicConfig(
    level=logging.INFO,
    format='%(asctime)s - %(levelname)s - %(message)s'
)

def load_and_prepare_data(file_path):
    """
    Load and prepare the data for time series modeling
    
    Args:
        file_path: Path to the CSV file containing the data
        
    Returns:
        pd.DataFrame: Prepared dataframe with datetime index
    """
    try:
        logging.info(f"Loading data from {file_path}")
        df = pd.read_csv(file_path)
        
        # Convert Date column to datetime and set as index
        df['Date'] = pd.to_datetime(df['Date'])
        df.set_index('Date', inplace=True)
        
        # Sort index to ensure chronological order
        df = df.sort_index()
        
        # Ensure monthly frequency and forward fill any missing values
        df = df.asfreq('M', method='ffill')
        
        logging.info(f"Data loaded successfully. Shape: {df.shape}")
        return df
    except Exception as e:
        logging.error(f"Error loading data: {str(e)}")
        raise

def train_predict_model(data):
    """
    Train a time series model and make predictions
    
    Args:
        data: DataFrame with the time series data
        
    Returns:
        tuple: (final_model, model_metrics)
    """
    try:
        logging.info("Initializing model setup")
        s = setup(
            data=data,
            target='cad_oas',
            fh=1,
            fold=3,
            seasonal_period=12,
            fold_strategy='expanding',
            transform_target=None,
            session_id=123,
            verbose=True
        )
        
        # Include only the ML models we know work with our setup
        include = [
            'lr_cds_dt', 'en_cds_dt', 'ridge_cds_dt', 'lasso_cds_dt', 'br_cds_dt',  # Linear models
            'huber_cds_dt', 'omp_cds_dt', 'knn_cds_dt',  # Other regression models
            'dt_cds_dt', 'rf_cds_dt', 'et_cds_dt', 'gbr_cds_dt',  # Tree models
            'ada_cds_dt', 'lightgbm_cds_dt', 'catboost_cds_dt'  # Boosting models
        ]
        
        # Compare models
        logging.info("Training and comparing models")
        best_model = compare_models(sort='MASE', include=include)
        
        # Get model performance metrics
        logging.info("Getting model performance metrics")
        model_metrics = pull()
        
        # Finalize the best model
        logging.info("Finalizing best model")
        final_model = finalize_model(best_model)
        
        return final_model, model_metrics
    except Exception as e:
        logging.error(f"Error in model training/prediction: {str(e)}")
        raise

def analyze_feature_importance(model, data):
    """
    Analyze and visualize feature importance and relationships
    
    Args:
        model: Trained model from PyCaret
        data: Training data DataFrame
        
    Returns:
        pd.DataFrame: Feature importance scores
    """
    try:
        # Create output directory if it doesn't exist
        os.makedirs('analysis_outputs', exist_ok=True)
        
        # For time series models, we'll use coefficients for linear models
        # and feature_importances_ for tree-based models
        feature_names = [col for col in data.columns if col != 'cad_oas']
        
        if hasattr(model, 'feature_importances_'):
            # For tree-based models
            importance_scores = model.feature_importances_
            importance = pd.DataFrame({
                'Feature': feature_names,
                'Importance': importance_scores
            })
        elif hasattr(model, 'coef_'):
            # For linear models
            importance_scores = np.abs(model.coef_)
            importance = pd.DataFrame({
                'Feature': feature_names,
                'Importance': importance_scores
            })
        else:
            # If model doesn't support direct feature importance
            # Use correlation analysis instead
            correlations = data.corr()['cad_oas'].drop('cad_oas')
            importance = pd.DataFrame({
                'Feature': correlations.index,
                'Importance': np.abs(correlations.values)
            })
        
        # Plot feature importance
        plt.figure(figsize=(12, 6))
        sns.barplot(x='Importance', y='Feature', 
                   data=importance.sort_values('Importance', ascending=True))
        plt.title('Feature Importance/Correlation with CAD OAS')
        plt.tight_layout()
        plt.savefig('analysis_outputs/feature_importance.png')
        plt.close()
        
        # Print top 5 most important features
        print("\nTop 5 Most Important Features:")
        print(importance.sort_values('Importance', ascending=False).head())
        
        # Calculate correlations with target
        correlations = data.corr()['cad_oas'].sort_values(ascending=False)
        print("\nCorrelations with CAD OAS:")
        print(correlations)
        
        # Create scatter plots for top 3 features
        top_features = importance.nlargest(3, 'Importance')['Feature'].tolist()
        fig, axes = plt.subplots(1, 3, figsize=(15, 5))
        
        for i, feature in enumerate(top_features):
            sns.scatterplot(data=data, x=feature, y='cad_oas', ax=axes[i])
            axes[i].set_title(f'CAD OAS vs {feature}')
        
        plt.tight_layout()
        plt.savefig('analysis_outputs/top_features_scatter.png')
        plt.close()
        
        # Save importance and correlations to CSV
        importance.to_csv('analysis_outputs/feature_importance.csv')
        correlations.to_frame('correlation').to_csv('analysis_outputs/feature_correlations.csv')
        
        return importance
        
    except Exception as e:
        logging.error(f"Error analyzing feature importance: {str(e)}")
        raise

def analyze_model_performance(model, data):
    """
    Analyze model performance metrics and make predictions
    
    Args:
        model: Trained model from PyCaret
        data: Training data DataFrame
    """
    try:
        # Get performance metrics
        metrics = pull()
        print("\nModel Performance Metrics:")
        print(metrics)
        
        # Get exogenous variables for prediction
        exog_data = data.drop('cad_oas', axis=1).iloc[-1:].copy()
        
        # Make predictions
        logging.info("Making predictions")
        predictions = predict_model(model, X=exog_data)
        
        # Save predictions
        os.makedirs('csv_outputs', exist_ok=True)
        csv_path = 'csv_outputs/cad_oas_predictions.csv'
        predictions.to_csv(csv_path)
        logging.info(f"Predictions saved to {os.path.abspath(csv_path)}")
        
        # Get last actual and predicted values
        last_actual = data['cad_oas'].iloc[-1]
        # Get the prediction column (should be the last column)
        pred_col = predictions.columns[-1]
        next_pred = predictions[pred_col].iloc[-1]
        
        print("\nPrediction Results:")
        print(f"Last actual value: {last_actual:.4f}")
        print(f"Predicted next value: {next_pred:.4f}")
        
        # Plot actual vs predicted
        plt.figure(figsize=(12, 6))
        plt.plot(data.index[-12:], data['cad_oas'].tail(12), label='Actual', marker='o')
        plt.axhline(y=next_pred, color='r', linestyle='--', label='Prediction')
        plt.title('CAD OAS - Last 12 Months and Prediction')
        plt.legend()
        plt.tight_layout()
        plt.savefig('analysis_outputs/prediction_plot.png')
        plt.close()
        
    except Exception as e:
        logging.error(f"Error analyzing model performance: {str(e)}")
        raise

def main():
    """
    Main function to orchestrate the model training and prediction process
    """
    try:
        # File path
        file_path = 'c:/Users/Eddy/Documents/auto_ml/csv_outputs/monthly_oas_pycaret.csv'
        
        # Load and prepare data
        data = load_and_prepare_data(file_path)
        
        # Train model and get predictions
        model, metrics = train_predict_model(data)
        
        # Analyze feature importance
        importance = analyze_feature_importance(model, data)
        
        # Analyze model performance
        analyze_model_performance(model, data)
        
    except Exception as e:
        logging.error(f"Error in main execution: {str(e)}")
        raise

if __name__ == "__main__":
    main()

2025-01-25 20:02:00,785 - INFO - Loading data from c:/Users/Eddy/Documents/auto_ml/csv_outputs/monthly_oas_pycaret.csv
2025-01-25 20:02:00,791 - INFO - Data loaded successfully. Shape: (257, 15)
2025-01-25 20:02:00,791 - INFO - Initializing model setup


Unnamed: 0,Description,Value
0,session_id,123
1,Target,cad_oas
2,Approach,Univariate
3,Exogenous Variables,Present
4,Original data shape,"(257, 15)"
5,Transformed data shape,"(257, 15)"
6,Transformed train set shape,"(256, 15)"
7,Transformed test set shape,"(1, 15)"
8,Rows with missing values,0.0%
9,Fold Generator,ExpandingWindowSplitter


2025-01-25 20:02:01,137 - INFO - Training and comparing models


Unnamed: 0,Model,MASE,RMSSE,MAE,RMSE,MAPE,SMAPE,TT (Sec)
ada_cds_dt,AdaBoost w/ Cond. Deseasonalize & Detrending,0.2917,0.1638,2.5009,2.5009,0.0243,0.0237,0.05
br_cds_dt,Bayesian Ridge w/ Cond. Deseasonalize & Detrending,0.3594,0.2018,3.0829,3.0829,0.0292,0.0286,0.0267
en_cds_dt,Elastic Net w/ Cond. Deseasonalize & Detrending,0.3619,0.2032,3.1046,3.1046,0.0295,0.0289,0.0233
lasso_cds_dt,Lasso w/ Cond. Deseasonalize & Detrending,0.3884,0.218,3.3318,3.3318,0.0315,0.0309,0.0233
lightgbm_cds_dt,Light Gradient Boosting w/ Cond. Deseasonalize & Detrending,0.5248,0.2947,4.5025,4.5025,0.0427,0.0415,0.1867
gbr_cds_dt,Gradient Boosting w/ Cond. Deseasonalize & Detrending,0.5449,0.306,4.6738,4.6738,0.0447,0.0433,0.0633
et_cds_dt,Extra Trees w/ Cond. Deseasonalize & Detrending,0.6062,0.3402,5.2026,5.2026,0.0479,0.0467,0.0567
omp_cds_dt,Orthogonal Matching Pursuit w/ Cond. Deseasonalize & Detrending,0.6357,0.3568,5.4576,5.4576,0.0493,0.0508,0.0233
huber_cds_dt,Huber w/ Cond. Deseasonalize & Detrending,0.6809,0.3822,5.8424,5.8424,0.0545,0.0529,0.0333
catboost_cds_dt,CatBoost Regressor w/ Cond. Deseasonalize & Detrending,0.7633,0.4284,6.5509,6.5509,0.0604,0.0586,0.82


2025-01-25 20:02:08,571 - INFO - Getting model performance metrics
2025-01-25 20:02:08,571 - INFO - Finalizing best model



Top 5 Most Important Features:
       Feature  Importance
1    us_ig_oas    0.885205
0    us_hy_oas    0.873249
13     tsx_12m    0.746836
2          vix    0.729640
6   us_lei_yoy    0.691128

Correlations with CAD OAS:
cad_oas                   1.000000
us_ig_oas                 0.885205
us_hy_oas                 0.873249
vix                       0.729640
us_3m_10y                 0.156175
us_inflation_surprises    0.069191
us_hard_data_surprises   -0.187111
tsx_1m                   -0.195322
us_growth_surprises      -0.292817
tsx_3m                   -0.370713
tsx_6m                   -0.554559
us_equity_revisions      -0.629549
us_economic_regime       -0.663500
us_lei_yoy               -0.691128
tsx_12m                  -0.746836
Name: cad_oas, dtype: float64


2025-01-25 20:02:09,381 - INFO - Making predictions



Model Performance Metrics:
                                                             Model    MASE  \
ada_cds_dt            AdaBoost w/ Cond. Deseasonalize & Detrending  0.2917   
br_cds_dt        Bayesian Ridge w/ Cond. Deseasonalize & Detren...  0.3594   
en_cds_dt          Elastic Net w/ Cond. Deseasonalize & Detrending  0.3619   
lasso_cds_dt             Lasso w/ Cond. Deseasonalize & Detrending  0.3884   
lightgbm_cds_dt  Light Gradient Boosting w/ Cond. Deseasonalize...  0.5248   
gbr_cds_dt       Gradient Boosting w/ Cond. Deseasonalize & Det...  0.5449   
et_cds_dt          Extra Trees w/ Cond. Deseasonalize & Detrending  0.6062   
omp_cds_dt       Orthogonal Matching Pursuit w/ Cond. Deseasona...  0.6357   
huber_cds_dt             Huber w/ Cond. Deseasonalize & Detrending  0.6809   
catboost_cds_dt  CatBoost Regressor w/ Cond. Deseasonalize & De...  0.7633   
dt_cds_dt        Decision Tree w/ Cond. Deseasonalize & Detrending  0.7845   
rf_cds_dt        Random Forest w/ Co

2025-01-25 20:02:09,573 - INFO - Predictions saved to c:\Users\Eddy\Documents\auto_ml\notebooks\csv_outputs\cad_oas_predictions.csv



Prediction Results:
Last actual value: 98.5117
Predicted next value: 101.9598
