In [102]:
import warnings
warnings.filterwarnings('ignore')

import pandas as pd
import numpy as np
from pathlib import Path
import time
import datetime
import hvplot.pandas

#Import SKLearn Library and CLasses
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.model_selection import GridSearchCV, RandomizedSearchCV
from sklearn.datasets import make_classification
from sklearn import metrics

from joblib import dump, load
import joblib

import yfinance as yf
import pyfolio as pf
from pyfolio import timeseries 
import matplotlib.pyplot as plt
import empyrical
%matplotlib inline

In [103]:
# Feature Set
# ICE BofA US High Yield Index Option-Adjusted Spread (BAMLH0A0HYM2)
# ICE BofA US Corporate Index Option-Adjusted Spread (BAMLC0A0CM)
# ICE BofA BBB US Corporate Index Option-Adjusted Spread (BAMLC0A4CBBB)
# ICE BofA BB US High Yield Index Option-Adjusted Spread (BAMLH0A1HYBB)
# ICE BofA CCC & Lower US High Yield Index Option-Adjusted Spread (BAMLH0A3HYC)

In [104]:
# Import feature set data and construct the data frame
feature_set_pct_path = Path('AutoOutputFiles/df_key_credit_data_usa_adjusted_pct.csv')
df_feature_set = pd.read_csv(feature_set_pct_path, index_col="Date", infer_datetime_format=True, parse_dates=True)
# Uncomment if required for testing
print(f'Feature set on import:\n')
df_feature_set.tail()

Feature set on import:



Unnamed: 0_level_0,BAMLH0A0HYM2,BAMLC0A0CM,BAMLC0A4CBBB,BAMLH0A1HYBB,BAMLH0A3HYC
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2021-10-11,0.0,0.0,0.0,-0.004484,-0.001508
2021-10-12,0.025,0.011111,0.009009,0.036036,0.013595
2021-10-13,-0.009146,0.0,0.0,-0.008696,-0.010432
2021-10-14,-0.021538,-0.010989,0.0,-0.017544,-0.019578
2021-10-15,-0.018868,-0.011111,-0.017857,-0.035714,0.001536


In [105]:
# Construct an empty data frame where the Date column = index and only contains forward test dates
# In this instance of the test, Dates begin on October 18, 2021 (First forward test date)
# This data frame will be used to hold the future predictions of the model

# NYSE business holiday calendar for reference: https://www.nyse.com/markets/hours-calendars

# US Equity Calendar
# This calendar must contain Dates for n-days in the future, beginning from October 18, 2021 (the date of the forward test start)
# Prior to subsequent runs of this notebook, ensure the next required forward date in manually updated in the csv noted in the following "Path" function
us_equity_calendar = Path('ManualFiles/calendar_usa_equity_trading_days_lag.csv')


df_us_equity_calendar = pd.read_csv(us_equity_calendar, index_col="Date", infer_datetime_format=True, parse_dates=True)
# Uncomment if required for testing
print(f'USA Equity Trading Days Calendar (Manually Constructed):\n')
df_us_equity_calendar.head(), df_us_equity_calendar.tail()

USA Equity Trading Days Calendar (Manually Constructed):



(Empty DataFrame
 Columns: []
 Index: [2021-10-18 00:00:00, 2021-10-25 00:00:00, 2021-11-01 00:00:00, 2021-11-08 00:00:00, 2021-11-15 00:00:00],
 Empty DataFrame
 Columns: []
 Index: [2021-10-22 00:00:00, 2021-10-29 00:00:00, 2021-11-05 00:00:00, 2021-11-12 00:00:00, 2021-11-19 00:00:00])

In [106]:
target_set_levels_path = Path('AutoOutputFiles/df_equity_data.csv')
equity_data = pd.read_csv(target_set_levels_path, index_col="Date", infer_datetime_format=True, parse_dates=True)
# Uncomment if required for testing
print(f'\nTarget set on import:\n')
equity_data.tail()


Target set on import:



Unnamed: 0_level_0,Close,EquityPriceReturns,PositiveReturn
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2021-10-11,434.690002,-0.00724,0
2021-10-12,433.619995,-0.002462,0
2021-10-13,435.179993,0.003598,1
2021-10-14,442.5,0.016821,1
2021-10-15,445.869995,0.007616,1


In [107]:
# Feature set optimal lag
# If additional test conclude that a different lag is optimal, update the following variable value = optimal lag number
lag = 30

# In this version, do not shift the feature set
# The lag value represents all future predictions (one benefit of a large lag value)
# e.g. lag = 36 predicts 36 daily values in the future (future version will compare how the predictions change when running daily)
# i.e. Run on October 21, 2021 leads to 36 future daily predictions, Run on October 22, 2021 leads to the next 36 predictions
#      Do the 35 predictions that overlap stay the same, change, what are the descriptive statistics for this data?

In [108]:
print('The feature set is ready for forward testing')
print(f'The feature set is NOT shifted and the predictions that follow represent "n Days" in the future predictions')
print(f'For example:  If lag = 5, then prediction for 5 days in the future')

X_variables = ['BAMLH0A0HYM2', 'BAMLC0A0CM', 'BAMLC0A4CBBB', 'BAMLH0A1HYBB', 'BAMLH0A3HYC']
X = df_feature_set[X_variables]
X

The feature set is ready for forward testing
The feature set is NOT shifted and the predictions that follow represent "n Days" in the future predictions
For example:  If lag = 5, then prediction for 5 days in the future


Unnamed: 0_level_0,BAMLH0A0HYM2,BAMLC0A0CM,BAMLC0A4CBBB,BAMLH0A1HYBB,BAMLH0A3HYC
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
1997-01-02,-0.022364,0.000000,0.000000,-0.015385,-0.013550
1997-01-03,0.009804,0.016667,0.000000,0.010417,0.012363
1997-01-06,0.003236,0.000000,0.024096,0.000000,-0.001357
1997-01-07,0.000000,0.000000,-0.011765,-0.005155,0.000000
1997-01-08,-0.009677,-0.016393,-0.011905,-0.010363,-0.002717
...,...,...,...,...,...
2021-10-11,0.000000,0.000000,0.000000,-0.004484,-0.001508
2021-10-12,0.025000,0.011111,0.009009,0.036036,0.013595
2021-10-13,-0.009146,0.000000,0.000000,-0.008696,-0.010432
2021-10-14,-0.021538,-0.010989,0.000000,-0.017544,-0.019578


In [109]:
# Construct the file name to use to load the optimal lagged model
#fl_nm = 'model_candidates/Lag_' + str(i) + '_random_forest_' + testing_end + '.joblib'

# date format = YYYY-MM-DD
finalized_model_period_end_date = '2021-10-15'

fl_path = 'algo_optimal_parameters/back_test_using_mean_grid_values/Lag_' + str(lag) + '_random_forest_' + finalized_model_period_end_date + '.joblib'
print(fl_path)

algo_optimal_parameters/back_test_using_mean_grid_values/Lag_30_random_forest_2021-10-15.joblib


In [110]:
# load, no need to initialize the loaded_rf
loaded_rfc = joblib.load(fl_path)

In [111]:
# Make predictions for the target (equity)
predictions = loaded_rfc.predict(X)
predictions

# Uncomment for length and data type for predictions
# "predictions" should result equal a numpy.ndarray with dtype=int64
#len(predictions)
#type(predictions)

array([1, 1, 1, ..., 0, 1, 1], dtype=int64)

In [117]:
# convert "predictions" data type from a numpy.ndarray to a pandas series (will be used in construction of data frame below)
# each predicted value is for 30 days into the future, as the feature set was not shifted
all_new_predictions = pd.Series(predictions)
all_new_predictions

# We only want the next n-lag number of future predictions beginning forward test start date
# Slice the pd series for the last n-lag predictions
new_predictions = all_new_predictions.iloc[-lag:]
new_predictions

6427    1
6428    1
6429    0
6430    1
6431    0
6432    1
6433    1
6434    0
6435    1
6436    0
6437    1
6438    0
6439    0
6440    1
6441    0
6442    0
6443    1
6444    1
6445    1
6446    1
6447    0
6448    0
6449    0
6450    1
6451    1
6452    1
6453    1
6454    0
6455    1
6456    1
dtype: int64

In [118]:
# Instantiate data frame using the index from df_us_equity_calendar
# df_us_equity_calendar contains the dates we want to hold the "new_predictions" data
df_new_predictions = pd.DataFrame(index=df_us_equity_calendar.index)
df_new_predictions

2021-10-18
2021-10-25
2021-11-01
2021-11-08
2021-11-15
2021-11-22
2021-11-29
2021-10-19
2021-10-26
2021-11-02
2021-11-09


In [115]:
# Reference for the best method to add the values of a Series as a new column of a DataFrame: 
# https://stackoverflow.com/questions/12555323/adding-new-column-to-existing-dataframe-in-python-pandas

# Example code
# 'e' in the following is the column name to assign 
#df1 = df1.assign(e=pd.Series(np.random.randn(sLength)).values)

# Add the panda series that contains the prdictions
# NB:  We will always need to know the T+2 prediction when entering or exiting the equity position, as the order has to be placed on the prior day as a Market-on-Close (MOC) order
#      i.e. the prior MOC order allows for the capture on the next day's equity performance (or on position exit, avoiding the next day's equity performance)
#      Have to retain the T+1 predicted value during the forward test

# for now, run this daily, beginning on the first forward test date, drop the first prediction, and the test actually begins the next day
# manually enter into this list, as it is unclear if predictions will change when run on subsequent days
# e.g. when running with period end date = Friday, October 15, 2021, manually enter the predicted value that appears on the next trading day (Monday, October 18, 2021)

# The list will be used later
actual_forward_test_results = [1]

# The resulting data frame represnets 
df_new_predictions = df_new_predictions.assign(ForwardTestPredictions=pd.Series(new_predictions).values)
df_new_predictions

Unnamed: 0_level_0,ForwardTestPredictions
Date,Unnamed: 1_level_1
2021-10-18,1
2021-10-25,1
2021-11-01,0
2021-11-08,1
2021-11-15,0
2021-11-22,1
2021-11-29,1
2021-10-19,0
2021-10-26,1
2021-11-02,0


In [101]:
equity_data.tail(n=lag)

Unnamed: 0_level_0,Close,EquityPriceReturns,PositiveReturn
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2021-09-03,451.633118,-0.000243,0
2021-09-07,450.01828,-0.003576,0
2021-09-08,449.470062,-0.001218,0
2021-09-09,447.546234,-0.00428,0
2021-09-10,444.017517,-0.007885,0
2021-09-13,445.15387,0.002559,1
2021-09-14,442.751587,-0.005397,0
2021-09-15,446.449738,0.008353,1
2021-09-16,445.742004,-0.001585,0
2021-09-17,441.399994,-0.009741,0


In [None]:
# Filter the equity_data data frame for only the required forward test dates
current_days_in_forward_test = 5
df_equity_data_forward_test = equity_data.iloc[-5:]
df_equity_data_forward_test.head()

In [None]:
# Only retain the forward test dates to allow the following code to calculate forward test cummulative return
#################  I HAVE TO FIGURE OUT A WAY TO CALCULATE THE NUMBER OF ROWS TO RETAIN; CURRENTLY HARD CODED
#################  ONLY WANT DATA AFTER OCTOBER 15, 2021
df_forward_test_performance_results = df_forward_test_performance_results.iloc[-4:]

df_forward_test_performance_results['Equity_Position'] = df_forward_test_performance_results['ForwardTestPredictions'] * df_forward_test_performance_results['EquityPriceReturns']
df_forward_test_performance_results['Strategy_Cum_Rtn'] = (1 + df_forward_test_performance_results['Equity_Position']).cumprod()
df_forward_test_performance_results['Equity_Cum_Rtn'] = (1 + df_forward_test_performance_results['EquityPriceReturns']).cumprod()


