In [1]:
import warnings
warnings.filterwarnings('ignore')

import pandas as pd
import numpy as np
from pathlib import Path
import datetime
from joblib import dump, load
import joblib

In [2]:
# Resource for saving and loading sklearn models:  https://mljar.com/blog/save-load-random-forest/

In [3]:
# Feature Set
# ICE BofA US High Yield Index Option-Adjusted Spread (BAMLH0A0HYM2)
# ICE BofA US Corporate Index Option-Adjusted Spread (BAMLC0A0CM)
# ICE BofA BBB US Corporate Index Option-Adjusted Spread (BAMLC0A4CBBB)
# ICE BofA BB US High Yield Index Option-Adjusted Spread (BAMLH0A1HYBB)
# ICE BofA CCC & Lower US High Yield Index Option-Adjusted Spread (BAMLH0A3HYC)

In [4]:
# Run date included in data frame construction below
run_date = datetime.datetime.now().strftime('%Y-%m-%d')

In [5]:
# Import feature set data and construct the data frame
feature_set_pct_path = Path('AutoOutputFiles/df_key_credit_data_usa_adjusted_pct.csv')
df_feature_set = pd.read_csv(feature_set_pct_path, index_col="Date", infer_datetime_format=True, parse_dates=True)

# Uncomment if required for testing
print(f'Feature set on import:\n')
df_feature_set.tail()

Feature set on import:



Unnamed: 0_level_0,BAMLH0A0HYM2,BAMLC0A0CM,BAMLC0A4CBBB,BAMLH0A1HYBB,BAMLH0A3HYC
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2021-10-19,-0.006431,0.0,0.0,-0.013889,-0.003082
2021-10-20,-0.006472,0.0,0.0,-0.004695,-0.001546
2021-10-21,-0.013029,0.0,0.0,-0.023585,-0.004644
2021-10-22,0.013201,0.0,0.009091,0.014493,0.010886
2021-10-25,0.013029,0.0,-0.009009,0.019048,0.006154


In [6]:
# Feature set lag for Naive Model
naive_model_lag = 27

In [7]:
print('The feature set is ready for forward testing')
print(f'The feature set was shifted by {naive_model_lag} days:\n')
print(f'Stated differently, we are using the T-minus {naive_model_lag} day feature set variable values for predictive purposes:\n')

X_variables = ['BAMLH0A0HYM2', 'BAMLC0A0CM', 'BAMLC0A4CBBB', 'BAMLH0A1HYBB', 'BAMLH0A3HYC']
X = df_feature_set[X_variables].shift(naive_model_lag).dropna()
X

The feature set is ready for forward testing
The feature set was shifted by 27 days:

Stated differently, we are using the T-minus 27 day feature set variable values for predictive purposes:



Unnamed: 0_level_0,BAMLH0A0HYM2,BAMLC0A0CM,BAMLC0A4CBBB,BAMLH0A1HYBB,BAMLH0A3HYC
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
1997-02-10,-0.022364,0.000000,0.000000,-0.015385,-0.013550
1997-02-11,0.009804,0.016667,0.000000,0.010417,0.012363
1997-02-12,0.003236,0.000000,0.024096,0.000000,-0.001357
1997-02-13,0.000000,0.000000,-0.011765,-0.005155,0.000000
1997-02-14,-0.009677,-0.016393,-0.011905,-0.010363,-0.002717
...,...,...,...,...,...
2021-10-19,-0.012698,-0.010870,-0.008850,-0.023148,0.000000
2021-10-20,-0.006431,-0.010989,0.000000,0.000000,-0.009202
2021-10-21,0.006472,0.000000,0.000000,0.009479,-0.001548
2021-10-22,-0.009646,0.000000,0.000000,-0.018779,-0.009302


In [8]:
# Define the last in-sample model version date
# This represents the last in-sample testing date when the models were finalized
# date format = YYYY-MM-DD
finalized_model_period_end_date = '2021-10-15'

# Need to change 'nieve' to 'naive' to address misspelling
naive_model_fl_path = 'model_candidates/nieve/Lag_' + str(naive_model_lag) + '_random_forest_' + finalized_model_period_end_date + '.joblib'

print(naive_model_fl_path)

model_candidates/nieve/Lag_27_random_forest_2021-10-15.joblib


In [9]:
# load the model, no need to initialize the loaded_rf
loaded_rfc = joblib.load(naive_model_fl_path)

In [10]:
# Make predictions for the target (equity)
predictions = loaded_rfc.predict(X)
predictions

# Uncomment for length and data type for predictions
# "predictions" should result equal a numpy.ndarray with dtype=int64
len(predictions)
#type(predictions)

6436

In [11]:
predictions

array([1, 0, 1, ..., 0, 1, 0], dtype=int64)

In [12]:
# Convert "predictions" data type from a numpy.ndarray to a pandas series
# each predicted value is for 27 days into the future, as the feature set was not shifted
all_new_predictions = pd.Series(predictions)
all_new_predictions

print(f'We are only concerned with predictions from October 18, 2021 onward')

# Final prior to project submission (We have 6 forward test results)
new_predictions = all_new_predictions.iloc[-6:]
new_predictions

We are only concerned with predictions from October 18, 2021 onward


6430    1
6431    1
6432    1
6433    0
6434    1
6435    0
dtype: int64