In [1]:
import warnings
warnings.filterwarnings('ignore')

import pandas as pd
import numpy as np
from pathlib import Path
import datetime
from joblib import dump, load
import joblib

In [None]:
# Resource for saving and loading sklearn models:  https://mljar.com/blog/save-load-random-forest/

In [2]:
# Feature Set
# ICE BofA US High Yield Index Option-Adjusted Spread (BAMLH0A0HYM2)
# ICE BofA US Corporate Index Option-Adjusted Spread (BAMLC0A0CM)
# ICE BofA BBB US Corporate Index Option-Adjusted Spread (BAMLC0A4CBBB)
# ICE BofA BB US High Yield Index Option-Adjusted Spread (BAMLH0A1HYBB)
# ICE BofA CCC & Lower US High Yield Index Option-Adjusted Spread (BAMLH0A3HYC)

In [3]:
# Run date included in data frame construction below
run_date = datetime.datetime.now().strftime('%Y-%m-%d')

In [4]:
# Import feature set data and construct the data frame
feature_set_pct_path = Path('AutoOutputFiles/df_key_credit_data_usa_adjusted_pct.csv')
df_feature_set = pd.read_csv(feature_set_pct_path, index_col="Date", infer_datetime_format=True, parse_dates=True)

# Uncomment if required for testing
print(f'Feature set on import:\n')
df_feature_set.tail()

Feature set on import:



Unnamed: 0_level_0,BAMLH0A0HYM2,BAMLC0A0CM,BAMLC0A4CBBB,BAMLH0A1HYBB,BAMLH0A3HYC
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2021-10-12,0.025,0.011111,0.009009,0.036036,0.013595
2021-10-13,-0.009146,0.0,0.0,-0.008696,-0.010432
2021-10-14,-0.021538,-0.010989,0.0,-0.017544,-0.019578
2021-10-15,-0.018868,-0.011111,-0.017857,-0.035714,0.001536
2021-10-18,-0.003205,0.0,0.0,0.0,-0.004601


In [5]:
# Feature set lag for Optimal Model
optimal_model_lag = 30

In [6]:
print('The feature set is ready for forward testing')
print(f'The feature set was shifted by {optimal_model_lag} days:\n')
print(f'Stated differently, we are using the T-minus {optimal_model_lag} day feature set variable values for predictive purposes:\n')

X_variables = ['BAMLH0A0HYM2', 'BAMLC0A0CM', 'BAMLC0A4CBBB', 'BAMLH0A1HYBB', 'BAMLH0A3HYC']
X = df_feature_set[X_variables].shift(optimal_model_lag).dropna()
X

The feature set is ready for forward testing
The feature set was shifted by 30 days:

Stated differently, we are using the T-minus 30 day feature set variable values for predictive purposes:



Unnamed: 0_level_0,BAMLH0A0HYM2,BAMLC0A0CM,BAMLC0A4CBBB,BAMLH0A1HYBB,BAMLH0A3HYC
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
1997-02-13,-0.022364,0.000000,0.000000,-0.015385,-0.013550
1997-02-14,0.009804,0.016667,0.000000,0.010417,0.012363
1997-02-18,0.003236,0.000000,0.024096,0.000000,-0.001357
1997-02-19,0.000000,0.000000,-0.011765,-0.005155,0.000000
1997-02-20,-0.009677,-0.016393,-0.011905,-0.010363,-0.002717
...,...,...,...,...,...
2021-10-12,0.015823,0.000000,0.000000,0.009132,0.039370
2021-10-13,-0.006231,0.000000,0.000000,-0.013575,-0.009091
2021-10-14,-0.009404,0.000000,0.000000,-0.004587,0.003058
2021-10-15,-0.006329,0.000000,-0.008772,-0.009217,-0.012195


In [7]:
# Define the last in-sample model version date
# This represents the last in-sample testing date when the models were finalized
# date format = YYYY-MM-DD
finalized_model_period_end_date = '2021-10-15'

# Construct the path & file name to use in loading the optimal lagged model
optimal_model_fl_path = 'algo_optimal_parameters/back_test_using_mean_grid_values/Lag_' + str(optimal_model_lag) + '_random_forest_' + finalized_model_period_end_date + '.joblib'

print(optimal_model_fl_path)

algo_optimal_parameters/back_test_using_mean_grid_values/Lag_30_random_forest_2021-10-15.joblib


In [8]:
# load the model, no need to initialize the loaded_rf
loaded_rfc = joblib.load(optimal_model_fl_path)

In [9]:
# Make predictions for the target (equity)
predictions = loaded_rfc.predict(X)
predictions

# Uncomment for length and data type for predictions
# "predictions" should result equal a numpy.ndarray with dtype=int64
len(predictions)
#type(predictions)

6428

In [10]:
predictions

array([1, 1, 1, ..., 0, 1, 1], dtype=int64)

In [11]:
# convert "predictions" data type from a numpy.ndarray to a pandas series (will be used in construction of data frame below)
# each predicted value is for 30 days into the future, as the feature set was not shifted
all_new_predictions = pd.Series(predictions)
all_new_predictions

# NB:  When using October 18, 2021 as the first forward test date
#      The initial 30 day forecasts are available immediately
#      On subsequent runs, when new feature data is available, we are only intersted in the last forecast in series
#      E.g. When the next day's feature set data is available, run notebook and now you are intersted in the last value in series
#           This represnts the forecast for the target 30 days from the new period end date, etc.
print(f'We are only concerned with the prediction found at index {len(predictions) - 1}')
new_predictions = all_new_predictions.iloc[-30:]
new_predictions

We are only concerned with the prediction found at index 6427


6398    1
6399    1
6400    1
6401    1
6402    1
6403    0
6404    1
6405    1
6406    1
6407    1
6408    0
6409    1
6410    0
6411    0
6412    1
6413    1
6414    0
6415    1
6416    1
6417    0
6418    0
6419    0
6420    0
6421    1
6422    1
6423    1
6424    1
6425    0
6426    1
6427    1
dtype: int64