In [1]:
import vectorbt as vbt
import numpy as np
import pandas as pd
import datetime
import plotly.express as px
from xbbg import blp
import os
import quantstats as qs
import warnings
warnings.filterwarnings('ignore')
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report
import logging

# Import custom modules with an alias
import bloomberg_data as bd
import transformations as tr
import visuals as vis

In [13]:
# Main data retrieval and merging process
tickers = ['.MIDERCAD U Index', '.CADIG F Index', 'VIX Index', '.HYUSER U Index', '.IGUSER U Index','SPTSX INDEX','ECSURPUS Index','LEI YOY Index','ECRPUS 1Y Index','ECRPCA 1Y Index','GDGCAFJP Index','GDGCAFJP Index','.HARDATA G Index','CGERGLOB Index','.FRCRRM G Index']
fields = [['PX_LAST'], ['PX_LAST'], ['PX_LAST'], ['PX_LAST'], ['PX_LAST'], ['PX_LAST'],['PX_LAST'],['PX_LAST'],['PX_LAST'],['PX_LAST'],['PX_LAST'],['PX_LAST'],['PX_LAST'],['PX_LAST'],['PX_LAST'],['PX_LAST'],['PX_LAST']]
start_date = '2006-01-01'
end_date = '2025-12-31'
column_names = [['cad_ig_er_index'], ['cad_ig_sprds'], ['vix'], ['us_hy_er_index'], ['us_ig_er_index'],['tsx_index'],['us_eco_suprise'],['lei_yoy_index'],['us_recession_odds'],['cad_recession_odds'],['atlanta_fed'],['growth_surpise'],['hard_data'],['equity_revisions'],['fed_credit_model']]
frequency = 'd'  # Single frequency for all tickers

dataframes = []

for ticker, field, col_name in zip(tickers, fields, column_names):
    df = bd.get_single_ticker_data(ticker, field, start_date, end_date, freq=frequency, column_names=col_name)
    dataframes.append(df)
    logging.info(f"Data for {ticker}:")
    logging.info(df.head())  # Print the first few rows of each dataframe

# Merge all dataframes
merged_data = bd.merge_dataframes(dataframes, method='outer')

# Print the final merged data and its information
logging.info("Merged data head:")
logging.info(merged_data.head())
logging.info('----------------------------------------------------------------')
logging.info('----------------------------------------------------------------')
logging.info(merged_data.tail())
logging.info(merged_data.info())

# Rename the index to "Date" and reset it
merged_data.index.name = 'Date'
csv_data = merged_data.reset_index()

# Save the dataframe to a CSV file
csv_data.to_csv('Outputs/csv_data.csv', index=False)

# Rename for further use
data = merged_data

2024-09-16 14:00:55,257 - INFO - Retrieving data for ticker: .MIDERCAD U Index with frequency: DAILY
2024-09-16 14:00:55,641 - INFO - Retrieved data shape for .MIDERCAD U Index: (2094, 1)
2024-09-16 14:00:55,641 - INFO - Cleaned data shape for .MIDERCAD U Index: (2094, 1)
2024-09-16 14:00:55,641 - INFO - Successfully retrieved data for ticker: .MIDERCAD U Index
2024-09-16 14:00:55,641 - INFO - Data for .MIDERCAD U Index:
2024-09-16 14:00:55,641 - INFO -             cad_ig_er_index
2006-01-31           1.0579
2006-02-28           1.0576
2006-03-31           1.0569
2006-04-28           1.0561
2006-05-31           1.0565
2024-09-16 14:00:55,641 - INFO - Retrieving data for ticker: .CADIG F Index with frequency: DAILY
2024-09-16 14:00:56,316 - INFO - Retrieved data shape for .CADIG F Index: (4662, 1)
2024-09-16 14:00:56,317 - INFO - Cleaned data shape for .CADIG F Index: (4662, 1)
2024-09-16 14:00:56,317 - INFO - Successfully retrieved data for ticker: .CADIG F Index
2024-09-16 14:00:56,31

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 4967 entries, 2006-01-01 to 2024-09-16
Data columns (total 15 columns):
 #   Column              Non-Null Count  Dtype  
---  ------              --------------  -----  
 0   cad_ig_er_index     4967 non-null   float64
 1   cad_ig_sprds        4967 non-null   float64
 2   vix                 4967 non-null   float64
 3   us_hy_er_index      4967 non-null   float64
 4   us_ig_er_index      4967 non-null   float64
 5   tsx_index           4967 non-null   float64
 6   us_eco_suprise      4967 non-null   float64
 7   lei_yoy_index       4967 non-null   float64
 8   us_recession_odds   4967 non-null   float64
 9   cad_recession_odds  4967 non-null   float64
 10  atlanta_fed         4967 non-null   float64
 11  growth_surpise      4967 non-null   float64
 12  hard_data           4967 non-null   float64
 13  equity_revisions    4967 non-null   float64
 14  fed_credit_model    4967 non-null   float64
dtypes: float64(15)
memory usage: 620.9 KB

In [14]:
data.tail()

Unnamed: 0_level_0,cad_ig_er_index,cad_ig_sprds,vix,us_hy_er_index,us_ig_er_index,tsx_index,us_eco_suprise,lei_yoy_index,us_recession_odds,cad_recession_odds,atlanta_fed,growth_surpise,hard_data,equity_revisions,fed_credit_model
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
2024-09-10,1.39,124.1311,19.08,1.1115,1.4251,23003.09,-0.388,-5.2,30.0,25.0,2.471,2.471,-0.2231,-0.22,0.18
2024-09-11,1.3903,124.0589,17.69,1.1122,1.4257,23211.17,-0.388,-5.2,30.0,25.0,2.471,2.471,-0.2231,-0.22,0.18
2024-09-12,1.3906,123.8093,17.07,1.1151,1.4271,23475.14,-0.391,-5.2,30.0,25.0,2.471,2.471,-0.2264,-0.22,0.18
2024-09-13,1.3911,123.0405,16.56,1.1154,1.4282,23568.65,-0.387,-5.2,30.0,25.0,2.471,2.471,-0.2264,-0.31,0.18
2024-09-16,1.3911,123.0405,17.06,1.1154,1.4282,23674.85,-0.329,-5.2,30.0,25.0,2.471,2.471,-0.2264,-0.31,0.18


In [15]:
data.head()

Unnamed: 0_level_0,cad_ig_er_index,cad_ig_sprds,vix,us_hy_er_index,us_ig_er_index,tsx_index,us_eco_suprise,lei_yoy_index,us_recession_odds,cad_recession_odds,atlanta_fed,growth_surpise,hard_data,equity_revisions,fed_credit_model
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
2006-01-01,1.0579,58.1482,11.14,0.5597,1.0864,11441.58,0.19,3.6,70.0,10.0,0.7,0.7,-0.0165,0.18,0.11
2006-01-03,1.0579,58.1482,11.14,0.5597,1.0864,11441.58,0.19,3.6,70.0,10.0,0.7,0.7,-0.0165,0.18,0.11
2006-01-04,1.0579,57.9397,11.37,0.5607,1.0867,11501.48,0.2,3.6,70.0,10.0,0.7,0.7,-0.0165,0.18,0.11
2006-01-05,1.0579,57.7164,11.31,0.5619,1.0869,11507.68,0.175,3.6,70.0,10.0,0.7,0.7,-0.0165,0.18,0.11
2006-01-06,1.0579,57.2861,11.0,0.564,1.0869,11620.46,0.192,3.6,70.0,10.0,0.7,0.7,-0.0165,0.18,0.11


In [16]:
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import pandas as pd

# Assuming 'data' is your DataFrame containing time series data
# Example of what 'data' might look like (replace with your actual DataFrame)
# data = pd.DataFrame({...})  # Your actual data

# Create subplots
fig = make_subplots(rows=len(data.columns), cols=1, shared_xaxes=True, 
                    vertical_spacing=0.02, subplot_titles=data.columns)

# Add a line chart for each column
for i, col in enumerate(data.columns):
    fig.add_trace(
        go.Scatter(x=data.index, y=data[col], mode='lines', name=col),
        row=i+1, col=1
    )

# Update layout to improve appearance
fig.update_layout(height=300 * len(data.columns), width=900, 
                  title_text="Time Series Line Charts for All Columns in DataFrame",
                  showlegend=False)

# Show the figure
fig.show()



In [19]:

# Create a new DataFrame named 'data_transformations' as a copy of the original DataFrame
data_transformations = data.copy()

# Calculate and add new columns for 1/3/6 period percentage changes

# For cad_ig_sprds
data_transformations['cad_ig_sprds_1m_pct_change'] = data_transformations['cad_ig_sprds'].pct_change(periods=1)
data_transformations['cad_ig_sprds_3m_pct_change'] = data_transformations['cad_ig_sprds'].pct_change(periods=3)
data_transformations['cad_ig_sprds_6m_pct_change'] = data_transformations['cad_ig_sprds'].pct_change(periods=6)

# For us_hy_er_index
data_transformations['us_hy_er_index_1m_pct_change'] = data_transformations['us_hy_er_index'].pct_change(periods=1)
data_transformations['us_hy_er_index_3m_pct_change'] = data_transformations['us_hy_er_index'].pct_change(periods=3)
data_transformations['us_hy_er_index_6m_pct_change'] = data_transformations['us_hy_er_index'].pct_change(periods=6)

# For us_ig_er_index
data_transformations['us_ig_er_index_1m_pct_change'] = data_transformations['us_ig_er_index'].pct_change(periods=1)
data_transformations['us_ig_er_index_3m_pct_change'] = data_transformations['us_ig_er_index'].pct_change(periods=3)
data_transformations['us_ig_er_index_6m_pct_change'] = data_transformations['us_ig_er_index'].pct_change(periods=6)

# For tsx_index
data_transformations['tsx_index_1m_pct_change'] = data_transformations['tsx_index'].pct_change(periods=1)
data_transformations['tsx_index_3m_pct_change'] = data_transformations['tsx_index'].pct_change(periods=3)
data_transformations['tsx_index_6m_pct_change'] = data_transformations['tsx_index'].pct_change(periods=6)

# Delete columns us_hy_er_index, us_ig_er_index, and tsx_index
columns_to_drop = ['us_hy_er_index', 'us_ig_er_index', 'tsx_index']
data_transformations = data_transformations.drop(columns=columns_to_drop)
data_transformations= data_transformations.dropna()

# Rename the index to "Date" and reset it
data_transformations_csv= data_transformations.reset_index()

# Save the dataframe to a CSV file
data_transformations_csv.to_csv('Outputs/data_transformations.csv', index=False)

# Print information about the new DataFrame
data_transformations.info()

# Created/Modified files during execution:
# No files were created or modified during this code execution.

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 4961 entries, 2006-01-10 to 2024-09-16
Data columns (total 24 columns):
 #   Column                        Non-Null Count  Dtype  
---  ------                        --------------  -----  
 0   cad_ig_er_index               4961 non-null   float64
 1   cad_ig_sprds                  4961 non-null   float64
 2   vix                           4961 non-null   float64
 3   us_eco_suprise                4961 non-null   float64
 4   lei_yoy_index                 4961 non-null   float64
 5   us_recession_odds             4961 non-null   float64
 6   cad_recession_odds            4961 non-null   float64
 7   atlanta_fed                   4961 non-null   float64
 8   growth_surpise                4961 non-null   float64
 9   hard_data                     4961 non-null   float64
 10  equity_revisions              4961 non-null   float64
 11  fed_credit_model              4961 non-null   float64
 12  cad_ig_sprds_1m_pct_change    4961 non-null 