# Random Forest Classifier Optimal Feature Lag, Model Parameters, & Strategy Performance Results

In [None]:
#### This notebook uses nieve parameter settings (naive will be compared against grid search averages) over numerous feature set lags (1 - 90 days)
#### n_estimators = 500
#### max_depth = 5000
#### max_features = 5
#### min_samples_split = 2

#### This notebook uses RandomizedSearchCV to determine 'optimal' model parameters, which are then averaged 
#### The average parameter values across all lags are then used within notebook 'Current_rfc_model_algo_optimal_params_all_avg.ipynb' to determine if optimal is superior to nieve

In [None]:
# Model Resource:    https://scikit-learn.org/stable/modules/generated/sklearn.ensemble.RandomForestClassifier.html
#                    https://github.com/scikit-learn/scikit-learn/blob/844b4be24/sklearn/ensemble/_forest.py#L1064 (code)
# Pyfolio Resource:  https://www.quantrocket.com/codeload/quant-finance-lectures/quant_finance_lectures/Lecture33-Portfolio-Analysis-with-pyfolio.ipynb.html
# Understanding the shift can be confusing when analyzing results.  This reference does a good job explaining (index is not shifted, only the column values):  https://datascienceparichay.com/article/pandas-shift-column-values-up-or-down/
# https://mljar.com/blog/save-load-random-forest/

In [None]:
# Feature Set Variables
# ICE BofA US High Yield Index Option-Adjusted Spread (BAMLH0A0HYM2)
# ICE BofA US Corporate Index Option-Adjusted Spread (BAMLC0A0CM)
# ICE BofA BBB US Corporate Index Option-Adjusted Spread (BAMLC0A4CBBB)
# ICE BofA BB US High Yield Index Option-Adjusted Spread (BAMLH0A1HYBB)
# ICE BofA CCC & Lower US High Yield Index Option-Adjusted Spread (BAMLH0A3HYC)

In [None]:
import warnings
warnings.filterwarnings('ignore')

import pandas as pd
import numpy as np
from pathlib import Path
import time
import datetime
import hvplot.pandas

#Import SKLearn Library and CLasses
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.model_selection import GridSearchCV, RandomizedSearchCV
from sklearn.datasets import make_classification
from sklearn import metrics

from joblib import dump, load
import joblib
import yfinance as yf
import pyfolio as pf
from pyfolio import timeseries 
import matplotlib.pyplot as plt
import empyrical
%matplotlib inline

In [None]:
# Values in this DataFrame are objects; proper data types are cast within the 'for loop" portion of code or hardcoded
df_random_forest_model_param = pd.read_csv('ManualFiles/random_forest_nieve_model_parameters.csv', index_col=0)
df_random_forest_model_param

In [None]:
#### Model Parameters for reference purposes

In [None]:
msg = ''
msg +=('class sklearn.ensemble.RandomForestClassifier(\n')
msg +=('    n_estimators=100, \n')
msg +=('    *, \n')
msg +=('    criterion="gini", \n')
msg +=('    max_depth=None, \n')
msg +=('    min_samples_split=2, \n')
msg +=('    min_samples_leaf=1, \n')
msg +=('    min_weight_fraction_leaf=0.0, \n')
msg +=('    max_features="auto", \n')
msg +=('    max_leaf_nodes=None, \n')
msg +=('    min_impurity_decrease=0.0, \n')
msg +=('    bootstrap=True, \n')
msg +=('    oob_score=False, \n')
msg +=('    n_jobs=None, \n')
msg +=('    random_state=None, \n')
msg +=('    verbose=0, \n')
msg +=('    warm_start=False, \n')
msg +=('    class_weight=None, \n')
msg +=('    ccp_alpha=0.0, \n')
msg +=('    max_samples=None \n')
msg +=(')')
print(msg)

In [None]:
# Range of shift for feature set
signal_shift_range = range(1,91)

# Run date included in data frame construction below
run_date = datetime.datetime.now().strftime('%Y-%m-%d')

# Instantiate result list to hold pyfolio performance results for strategy
results_list = []

# Instantiate best paramter list for feature lags
best_params_list = []

# Instantiate capture statistics for feature lags
capture_stats_list = []

# Note:  Sufficient historical data exists when shifting the dataset by "i" days will produce the same test data lengths
# testing_start = '2018-12-15' is hardcoded (test period length with expand on each subsequent day's testing of strategy results)
# First testing period = 713 days

for i in signal_shift_range:
    print(f'\n****************** RUNNING FEATURE LAG {i} ******************')
        
    # Open signals file for feature set; prior version placement outside of for loop, but that was leading to incorrect results
    # Full process and logic now included in for loop
    # Open signals file for feature set

    feature_set_pct_path = Path('AutoOutputFiles/df_key_credit_data_usa_adjusted_pct.csv')
    X = pd.read_csv(feature_set_pct_path, index_col="Date", infer_datetime_format=True, parse_dates=True)
    # Uncomment if required for testing
    #print(f'Feature set prior to shift\n')
    #print(X.head())
    
    # Shift feature set
    X = X.shift(i).dropna()
    # Uncomment if required for testing
    #print(f'\nFeature set post shift\n')
    #print(X.head())

    target_set_levels_path = Path('AutoOutputFiles/df_equity_data.csv')
    equity_data = pd.read_csv(target_set_levels_path, index_col="Date", infer_datetime_format=True, parse_dates=True)
    # Uncomment if required for testing
    #print(f'\nTarget set on import:\n')
    #print(equity_data.head())

    df_trading_signals = pd.concat([equity_data, X], axis='columns', join='inner')
    # Uncomment if required for testing
    #print(f'\nTrading Signals DataFrame used for modelling:\n')
    #print(df_trading_signals.head())

    # Manually Split into training and test datasets
    training_start = df_trading_signals.index.min().strftime(format='%Y-%m-%d')
    training_end = '2018-12-14'

    testing_start = '2018-12-15'
    testing_end = df_trading_signals.index.max().strftime(format='%Y-%m-%d')
    
    # Define X variable list
    x_variables = ['BAMLH0A0HYM2', 'BAMLC0A0CM', 'BAMLC0A4CBBB','BAMLH0A1HYBB', 'BAMLH0A3HYC']
         
    # X & Y Training Datasets
    x_train = df_trading_signals[x_variables][training_start:training_end]
    y_train = df_trading_signals['PositiveReturn'][training_start:training_end]
    # Uncomment if required for testing
    #print(f'\nx_train = {x_train}')
    #print(f'\ny_train = {y_train}')     
    
    # X and Y Testing Datasets
    x_test = df_trading_signals[x_variables][testing_start:testing_end]
    y_test = df_trading_signals['PositiveReturn'][testing_start:testing_end]
    # Uncomment if required for testing
    #print(f'\nx_test = {x_test}')
    #print(f'\ny_test = {y_test}')
    
    # RandomizedSearchCV for current lagged feature set
    # Used to determine mean values for best_param

    rfc = RandomForestClassifier(random_state=0)
    input_cv_value = 5
    input_random_state = 0
    input_return_train_score = True

    forest_params = [
        {
            'max_depth': list(range(9, 6001)), 
            'max_features': list(range(1,6)),     # Future Enhancements:  Remove this model restriction
            'n_estimators': list(range(1,1001)),  # Future Enhancements:  Starting at 30 (statistically significant level at 30 data point); 1 would be a simple decision tree
            'min_samples_split': list(range(1,51))
        }
    ]

    rgs = RandomizedSearchCV(rfc, forest_params, cv=input_cv_value, scoring='accuracy', return_train_score=input_return_train_score)
    
    rgs.fit(x_train, y_train)
    
    # best_params_ = dictionary
    best_params = rgs.best_params_
    
    # Append current iteration's best_params_ (list of lists will be used to construct this data frame)
    best_params_list.append(
        [
            run_date,
            testing_end,  # This also represents the period end date
            i, 
            best_params
        ]
    )
    
    # Uncomment if required for testing
    #print('\ncurrent best_params_list')
    #print(best_params_list)
    #print('')
    
    # Model Parameters used for df_strat_lag
    input_n_estimators = int(df_random_forest_model_param.loc['n_estimators'].values)
    input_max_depth = int(df_random_forest_model_param.loc['max_depth'].values)
    input_max_features = 'auto'
    input_random_state = int(df_random_forest_model_param.loc['random_state'].values)
    input_verbose = int(df_random_forest_model_param.loc['verbose'].values)    

    # Instantiate, Define, & fit the model
    model = RandomForestClassifier(
        n_estimators = input_n_estimators,
        max_depth = input_max_depth, 
        max_features = input_max_features, 
        random_state = input_random_state, 
        verbose = input_verbose
    )
    model.fit(x_train, y_train)

    # Make a prediction of "y" values from the x test dataset
    predictions = model.predict(x_test)

    # Assesmble actual y data (y_test) with predicted y data (from just above) into two columns in a dataframe:
    results = y_test.to_frame()
    results['PredictedValue'] = predictions

    # Add a difference column to data frame
    results['PredictedMinusActual'] = results['PredictedValue'] - results['PositiveReturn']
    
    # This column already exists in results data frame
    df_trading_signals.drop(columns=['PositiveReturn'], inplace=True)

    df_performance_results = pd.concat([results, df_trading_signals], axis='columns', join='inner')
    df_performance_results['Equity_Position'] = df_performance_results['PredictedValue'] * df_performance_results['EquityPriceReturns']
    df_performance_results['Strategy_Cum_Rtn'] = ( 1 + df_performance_results['Equity_Position']).cumprod()
    df_performance_results['Equity_Cum_Rtn'] = ( 1 + df_performance_results['EquityPriceReturns']).cumprod()
    
    # Save each iteration as a csv file in folder AutoOutputFiles and append period end date to file
    fl_nm = 'AutoOutputFiles/Lag_' + str(i) + '_df_performance_results_' + testing_end + '.csv'

    df_performance_results.to_csv(fl_nm)

    # Modify data frame dates to conform to Pyfolio date requirements
    df_pyfolio = df_performance_results
    df_pyfolio = df_pyfolio.tz_localize("UTC")

    algo_performance_series = df_pyfolio['Equity_Position']
    
    # This is only required to produce stats related to index (i.e. beta, alpha, etc)
    Equity_performance_series = df_pyfolio['EquityPriceReturns']

    perf_func = timeseries.perf_stats 
    perf_stats_all = perf_func(returns=algo_performance_series, factor_returns=Equity_performance_series, positions=None, transactions=None, turnover_denom="USD")

    # We don't need the index performance each time - will be obtained in rfc_model once optimal shift has been determined

    # Append each iterations results to results_list which is later used to construct a data frame
    results_list.append(
        [
            run_date,
            testing_end,  # This also represents the period end date
            i,
            input_n_estimators,
            input_max_depth,
            input_max_features,
            input_random_state,
            input_verbose,            
            empyrical.annual_return(algo_performance_series),
            empyrical.annual_volatility(algo_performance_series),
            empyrical.sharpe_ratio(algo_performance_series),
            empyrical.calmar_ratio(algo_performance_series),
            empyrical.max_drawdown(algo_performance_series),
            empyrical.sortino_ratio(algo_performance_series),
            empyrical.alpha(algo_performance_series,Equity_performance_series),
            empyrical.beta(algo_performance_series,Equity_performance_series)
        ]
    )
    
    # Describe return days and strategy capture for these return days
    # Calculate the number of total trading days in test period (investable days)
    # Using PredictedValue column which contains 0 & 1 values, thus count function used and not sum function
    total_days = df_performance_results['PredictedValue'].count()
    #print(f'The total number of days in the testing period = {total_days}')

    # Calculate the total number of postiive trading days in test period
    # Using PositiveReturn column which contains 0 & 1 values; since we are only interested in values = 1, we can use sum function
    # Calculate the % of positive trading days as function of total trading days
    total_pos_days = df_performance_results.loc[(df_performance_results['PositiveReturn'] == 1), 'PositiveReturn'].sum()
    total_pos_days_pct = total_pos_days / total_days
    total_pos_days_pct
    #print(f'The total number of postive return days in the testing period = {total_pos_days} or {total_pos_days_pct:.3%} of total days')

    # Calculate the total number of negative trading days in period
    # We can subtract total positive days from total days
    # Calculate the % of negative trading days as a function of total trading days
    total_neg_days = total_days - total_pos_days
    total_neg_days_pct = total_neg_days / total_days
    #print(f'The total number of postive return days in the testing period = {total_neg_days} or {total_neg_days_pct:.3%} of total days')

    # Calculate the number of days the strategy predictive a positive equity return day (days invested in equity security) 
    # We can use the sum function as invested days = PredictedValue of 1 (negative days = 0)
    # Calculate the % of invested days as a function of total trading days
    strat_days_invested = df_performance_results['PredictedValue'].sum()
    strat_days_invested_pct = strat_days_invested / total_days
    #print(f'The strategy had a long positions in the equity on {strat_days_invested} or {strat_days_invested_pct:.3%} of total days')

    # Calculate the number of days invested in cash
    # We can subtract invested days from total days to determine value
    strat_days_in_cash = total_days - strat_days_invested
    strat_days_in_cash_pct = 1 - strat_days_invested_pct
    #print(f'The strategy had long positions in cash on {strat_days_in_cash} or {strat_days_in_cash_pct:.3%} of total days')

    # Upside Capture
    # Calculate the number of days where invested in equity security and equity security had a positive return day
    strat_days_invested_positive = df_performance_results.loc[(df_performance_results['PositiveReturn'] == 1) & (df_performance_results['PredictedValue'] == 1), 'PredictedValue'].count()
    strat_days_invested_positive_pct = strat_days_invested_positive / total_pos_days
    #print(f'The strategy captured {strat_days_invested_positive} of the equities postive return days or {strat_days_invested_positive_pct:.3%}')

    # Downside Capture
    # Calculate the number of days where invested in equity security and the equity security had a negative return day
    strat_days_invested_negative = df_performance_results.loc[(df_performance_results['PositiveReturn'] == 0) & (df_performance_results['PredictedValue'] == 0), 'PredictedValue'].count()
    strat_days_invested_negative_pct = strat_days_invested_negative / total_neg_days
    #print(f'The strategy captured {strat_days_invested_negative} of the equities negative return days or {strat_days_invested_negative_pct:.3%}')

    # Total days where equity delivered return greater than or equal to 1%
    total_days_pos_extreme = df_performance_results.loc[(df_performance_results['EquityPriceReturns'] >= 0.01), 'EquityPriceReturns'].count()
    #print(f'\nTotal days where equity delivered a return greater than or equal to 1%:  {total_days_pos_extreme}')

    # Total days where equity delivered return less than or equal to -1%
    total_days_neg_extreme = df_performance_results.loc[(df_performance_results['EquityPriceReturns'] <= -0.01), 'EquityPriceReturns'].count()
    #print(f'Total days where equity delivered a return less than or equal to -1%:  {total_days_neg_extreme}')

    # Total days the strategy captured positive extreme return days
    total_days_pos_extreme_invested = df_performance_results.loc[(df_performance_results['EquityPriceReturns'] >= 0.01) & (df_performance_results['PredictedValue'] == 1), 'EquityPriceReturns'].count()
    #print(f'Total days the strategy captured positive extreme return days (>= 1%): {total_days_pos_extreme_invested}')

    # Total days the strategy captured negative extreme return days
    total_days_neg_extreme_invested = df_performance_results.loc[(df_performance_results['EquityPriceReturns'] <= -0.01) & (df_performance_results['PredictedValue'] == 1), 'EquityPriceReturns'].count()
    #print(f'Total days the strategy captured negative extreme return days (<= -1%): {total_days_neg_extreme_invested}')
    
    # Append each iterations results to results_list which is later used to construct a data frame
    capture_stats_list.append(
        [
            run_date,
            testing_end,  # This also represents the period end date
            i,
            total_days,
            total_pos_days,
            total_neg_days,
            strat_days_invested,
            strat_days_in_cash,            
            strat_days_invested_positive,
            strat_days_invested_negative,
            total_days_pos_extreme,
            total_days_neg_extreme,
            total_days_pos_extreme_invested,
            total_days_neg_extreme_invested
        ]
    )
    
    # save the model
    # The first argument of the method is variable with the model. The second argument is the path and the file name where the resulting file will be created.
    fl_nm = 'model_candidates/nieve/Lag_' + str(i) + '_random_forest_' + testing_end + '.joblib'
    joblib.dump(model, fl_nm, compress=3)
    
    del X
    del equity_data
    del df_trading_signals
    del results
    del df_performance_results
    del df_pyfolio

In [None]:
def unpack_dict_values(**kwargs):
    ''' Function that accept dictionay key word arguements and unpacks to variables '''
    if 'n_estimators' in kwargs:
        n_estimators =  kwargs['n_estimators']
        print("n_estimators: ", kwargs['n_estimators'])
    
    if 'min_samples_split' in kwargs:
        min_samples_split = kwargs['min_samples_split']
        print("min_samples_split : ", kwargs['min_samples_split'])
    
    if 'max_features' in kwargs:
        max_features = kwargs['max_features']
        print("max_features : ", kwargs['max_features'])
    
    if 'max_depth' in kwargs:
        max_depth = kwargs['max_depth']
        print("max_depth : ", kwargs['max_depth'])
    
    return n_estimators, min_samples_split, max_features, max_depth

In [None]:
# Uncomment during testing to confirm list was constructed correctly or for future enhancements
best_params_list

In [None]:
# Transforming best_params_list data structure to list of list to make DataFrame construction simpler
# best_params_list is a list of lists but also contains a dictionary of values (constructing the data frame form this data structure too complicated for my skill set)

# List of lists to store values from best_params_list
unpack_list = []

# Iterate over best_params_list to unpack, store variables, and construct list of list 
for i in best_params_list:
    run_date = i[0]
    testing_end = i[1]
    feature_lag = i[2]
    # param_dict = dictionary
    param_dict = i[3]
    # unpack dictionary values using Key and assign to variable
    n_estimators = param_dict['n_estimators']
    min_samples_split = param_dict['min_samples_split']
    max_features = param_dict['max_features']
    max_depth = param_dict['max_depth']
    # Append the current variables to unpack list
    unpack_list.append([run_date, testing_end, feature_lag, n_estimators, min_samples_split, max_features, max_depth])

In [None]:
# Confirm the unpack_list was constructed correctly
for i in unpack_list:
    print(i)

In [None]:
# Constuct the DataFrame from the unpack_list
column_names = ['RunDate', 'PeriodEndDate', 'FeatureLag', 'n_estimators', 'min_samples_split', 'max_features', 'max_depth']
df_best_params = pd.DataFrame(unpack_list, columns=column_names)

# Save historical file
fl_name = 'AutoOutputFiles/df_best_params_' + testing_end + '.csv'
df_best_params.to_csv(fl_name)

# Save most recent file (may get imported into other notebooks)
df_best_params.to_csv('AutoOutputFiles/df_best_params.csv')
df_best_params

In [None]:
for i in capture_stats_list:
    print(i)

In [None]:
# Constuct the DataFrame for capture statistics
# 'EquityDays>=1%', 'EquityDays<=-1%' = Extreme Days
column_names = [
    'RunDate', 'PeriodEndDate', 'FeatureLag', 
    'EquityDays', 'EquityDaysPositive', 'EquityDaysNegative', 
    'StrategyDaysInEquity', 'StrategyDaysInCash', 
    'StrategyCapturePositive', 'StrategyCaptureNegative',
    'EquityDays>=1%', 'EquityDays<=-1%', 'StrategyCaptureExtremePositive', 'StrategyCaptureExtremeNegative']
df_capture_stats = pd.DataFrame(capture_stats_list, columns=column_names)

# Save historical file
fl_name = 'AutoOutputFiles/df_capture_stats_' + testing_end + '.csv'
df_capture_stats.to_csv(fl_name)

# Save most recent file (may get imported into other notebooks)
df_capture_stats.to_csv('AutoOutputFiles/df_capture_stats.csv')
df_capture_stats

In [None]:
def calculate_descriptive_stats(data):
    '''
    Function calculates the 4 moments of the return distribution (mean, standard devation, skew, and kurtosis)
    data parameter reflects the DataFrame with identified column
         e.g. data = df_capture_stats['StrategyDaysInEquity']
    
    '''
    mean = data.mean()
    standard_dev = data.std()
    skew = data.skew()
    kurtosis = data.kurt()
    
    return mean, standard_dev, skew, kurtosis

In [None]:
### Capture Statistics For Current Run

In [None]:
# Calculate & Plot descriptive statistics for StrategyDaysInEquity
mean, standard_dev, skew, kurtosis = calculate_descriptive_stats(df_capture_stats['StrategyDaysInEquity'])

print(f'DESCRIPTIVE STATISTICS:\n')
print(f'Mean = {mean}')
print(f'Standard Deviation = {standard_dev}')
print(f'Skew = {skew}')
print(f'Kurtosis = {kurtosis}\n')

# https://hvplot.holoviz.org/user_guide/Customization.html
df_capture_stats.hvplot.bar(
    title = 'Strategy Days Invested In Equity By Feature Lag', 
    y='StrategyDaysInEquity', 
    x='FeatureLag', 
    xlabel ='Feature Lag (Days)', 
    height=500,
    width = 1500

).opts(xrotation=90)

In [None]:
# Calculate & Plot descriptive statistics for StrategyDaysInCash
mean, standard_dev, skew, kurtosis = calculate_descriptive_stats(df_capture_stats['StrategyDaysInCash'])

print(f'DESCRIPTIVE STATISTICS:\n')
print(f'Mean = {mean}')
print(f'Standard Deviation = {standard_dev}')
print(f'Skew = {skew}')
print(f'Kurtosis = {kurtosis}\n')

# https://hvplot.holoviz.org/user_guide/Customization.html
df_capture_stats.hvplot.bar(
    title = 'Strategy Days Invested In Cash By Feature Lag', 
    y='StrategyDaysInCash', 
    x='FeatureLag', 
    xlabel ='Feature Lag (Days)', 
    height=500,
    width = 1500

).opts(xrotation=90)

In [None]:
# Calculate & Plot descriptive statistics for StrategyCapturePositive
mean, standard_dev, skew, kurtosis = calculate_descriptive_stats(df_capture_stats['StrategyCapturePositive'])

print(f'DESCRIPTIVE STATISTICS:\n')
print(f'Mean = {mean}')
print(f'Standard Deviation = {standard_dev}')
print(f'Skew = {skew}')
print(f'Kurtosis = {kurtosis}\n')

# https://hvplot.holoviz.org/user_guide/Customization.html
df_capture_stats.hvplot.bar(
    title = 'Strategy Positive Days Capture By Feature Lag', 
    y='StrategyCapturePositive', 
    x='FeatureLag', 
    xlabel ='Feature Lag (Days)', 
    height=500,
    width = 1500

).opts(xrotation=90)

In [None]:
# Calculate & Plot descriptive statistics for StrategyCaptureNegative
mean, standard_dev, skew, kurtosis = calculate_descriptive_stats(df_capture_stats['StrategyCaptureNegative'])

print(f'DESCRIPTIVE STATISTICS:\n')
print(f'Mean = {mean}')
print(f'Standard Deviation = {standard_dev}')
print(f'Skew = {skew}')
print(f'Kurtosis = {kurtosis}\n')

# https://hvplot.holoviz.org/user_guide/Customization.html
df_capture_stats.hvplot.bar(
    title = 'Strategy Negative Days Capture By Feature Lag', 
    y='StrategyCaptureNegative', 
    x='FeatureLag', 
    xlabel ='Feature Lag (Days)', 
    height=500,
    width = 1500

).opts(xrotation=90)

In [None]:
# Calculate & Plot descriptive statistics for StrategyCaptureExtremePositive
mean, standard_dev, skew, kurtosis = calculate_descriptive_stats(df_capture_stats['StrategyCaptureExtremePositive'])

print(f'DESCRIPTIVE STATISTICS:\n')
print(f'Mean = {mean}')
print(f'Standard Deviation = {standard_dev}')
print(f'Skew = {skew}')
print(f'Kurtosis = {kurtosis}\n')

# https://hvplot.holoviz.org/user_guide/Customization.html
df_capture_stats.hvplot.bar(
    title = 'Strategy Extreme Positive Days Capture (>= 1%) By Feature Lag', 
    y='StrategyCaptureExtremePositive', 
    x='FeatureLag', 
    xlabel ='Feature Lag (Days)', 
    height=500,
    width = 1500

).opts(xrotation=90)

In [None]:
# Calculate & Plot descriptive statistics for StrategyCaptureExtremeNegative
mean, standard_dev, skew, kurtosis = calculate_descriptive_stats(df_capture_stats['StrategyCaptureExtremeNegative'])

print(f'DESCRIPTIVE STATISTICS:\n')
print(f'Mean = {mean}')
print(f'Standard Deviation = {standard_dev}')
print(f'Skew = {skew}')
print(f'Kurtosis = {kurtosis}\n')

# https://hvplot.holoviz.org/user_guide/Customization.html
df_capture_stats.hvplot.bar(
    title = 'Strategy Extreme Negative Days Capture (<= 1%) By Feature Lag', 
    y='StrategyCaptureExtremeNegative', 
    x='FeatureLag', 
    xlabel ='Feature Lag (Days)', 
    height=500,
    width = 1500

).opts(xrotation=90)

In [None]:
# Calculate & Plot descriptive statistics for n_estimators
mean, standard_dev, skew, kurtosis = calculate_descriptive_stats(df_best_params['n_estimators'])

print(f'DESCRIPTIVE STATISTICS:\n')
print(f'Mean = {mean}')
print(f'Standard Deviation = {standard_dev}')
print(f'Skew = {skew}')
print(f'Kurtosis = {kurtosis}\n')

# https://hvplot.holoviz.org/user_guide/Customization.html
df_best_params.hvplot.bar(
    title = 'N Estimators By Feature Lag', 
    y='n_estimators', 
    x='FeatureLag', 
    xlabel ='Feature Lag (Days)', 
    height=500,
    width = 1500

).opts(xrotation=90)

In [None]:
# Calculate & Plot descriptive statistics for min_samples_split
mean, standard_dev, skew, kurtosis = calculate_descriptive_stats(df_best_params['min_samples_split'])

print(f'DESCRIPTIVE STATISTICS:\n')
print(f'Mean = {mean}')
print(f'Standard Deviation = {standard_dev}')
print(f'Skew = {skew}')
print(f'Kurtosis = {kurtosis}\n')

# https://hvplot.holoviz.org/user_guide/Customization.html
df_best_params.hvplot.bar(
    title = 'Minimum Sample Splits By Feature Lag', 
    y='min_samples_split', 
    x='FeatureLag', 
    xlabel ='Feature Lag (Days)', 
    height=500,
    width = 1500

).opts(xrotation=90)

In [None]:
# Calculate & Plot descriptive statistics for max_depth
mean, standard_dev, skew, kurtosis = calculate_descriptive_stats(df_best_params['max_depth'])

print(f'DESCRIPTIVE STATISTICS:\n')
print(f'Mean = {mean}')
print(f'Standard Deviation = {standard_dev}')
print(f'Skew = {skew}')
print(f'Kurtosis = {kurtosis}\n')

# https://hvplot.holoviz.org/user_guide/Customization.html
df_best_params.hvplot.bar(
    title = 'Maximum Depth By Feature Lag', 
    y='max_depth', 
    x='FeatureLag', 
    xlabel ='Feature Lag (Days)', 
    height=500,
    width = 1500

).opts(xrotation=90)

In [None]:
# Calculate & Plot descriptive statistics for max_features
mean, standard_dev, skew, kurtosis = calculate_descriptive_stats(df_best_params['max_features'])

print(f'DESCRIPTIVE STATISTICS:\n')
print(f'Mean = {mean}')
print(f'Standard Deviation = {standard_dev}')
print(f'Skew = {skew}')
print(f'Kurtosis = {kurtosis}\n')

# https://hvplot.holoviz.org/user_guide/Customization.html
df_best_params.hvplot.bar(
    title = 'Maximum Features By Feature Lag', 
    y='max_features', 
    x='FeatureLag', 
    xlabel ='Feature Lag (Days)', 
    height=500,
    width = 1500

).opts(xrotation=90)

In [None]:
# Strategy Performance Results Current Run

In [None]:
# Confirm the results_list that holds the strategy performance results was constructed correctly
for x in results_list:
    print(x)

In [None]:
# Construct the DataFrame to hold the strategy performance results from the results_list

# 'Cumulative Return' is an array and not included in the below; not an issue since annualized return is available
# "For loop" above now saves a csv file for each iteration and cummulative return can be obtained from those files
df_strat_lag = pd.DataFrame(results_list, columns = ['RunDate', 'PeriodEndDate',
    'FeatureLag', 'n_estimators', 'max_depth', 'max_features', 'random_state', 'verbose',
    'Annual Return', 'Annual Volatility', 'Sharpe Ratio', 
    'Calmar Ratio', 'Max Drawdown', 'Sortino Ratio', 'Alpha', 'Beta'])

# File with run_date retained for historical reord
fl_nm = 'AutoOutputFiles/df_strat_lag_' + testing_end + '.csv'
df_strat_lag.to_csv(fl_nm)

# This file is imported into notebook 'Current_rfc_model.ipynb' and is overriden on each execution of this notebook
df_strat_lag.to_csv('AutoOutputFiles/df_strat_lag.csv')
df_strat_lag

In [None]:
var_list = ['FeatureLag','Annual Return','Annual Volatility','Sharpe Ratio','Calmar Ratio','Max Drawdown','Sortino Ratio','Alpha','Beta']
plot_data = df_strat_lag[var_list]
plot_data.head()

In [None]:
# Calculate & Plot descriptive statistics for Annual Return
mean, standard_dev, skew, kurtosis = calculate_descriptive_stats(plot_data['Annual Return'])

print(f'DESCRIPTIVE STATISTICS:\n')
print(f'Mean = {mean}')
print(f'Standard Deviation = {standard_dev}')
print(f'Skew = {skew}')
print(f'Kurtosis = {kurtosis}\n')

# https://hvplot.holoviz.org/user_guide/Customization.html
plot_data.hvplot.bar(
    title = 'Strategy Annualized Return Per Feature Lag (Days)', 
    y='Annual Return', 
    x='FeatureLag', 
    xlabel ='Feature Lag (Days)', 
    height=500,
    width = 1500

).opts(xrotation=90)

In [None]:
# Calculate & Plot descriptive statistics for Annual Volatility
mean, standard_dev, skew, kurtosis = calculate_descriptive_stats(plot_data['Annual Volatility'])

print(f'DESCRIPTIVE STATISTICS:\n')
print(f'Mean = {mean}')
print(f'Standard Deviation = {standard_dev}')
print(f'Skew = {skew}')
print(f'Kurtosis = {kurtosis}\n')


plot_data.hvplot.bar(
    title = 'Strategy Annual Volatility Per Feature Lag (Days)', 
    y='Annual Volatility', 
    x='FeatureLag', 
    xlabel ='Feature Lag (Days)', 
    height=500,
    width = 1500

).opts(xrotation=90)

In [None]:
# Calculate & Plot descriptive statistics for Sharpe Ratio
mean, standard_dev, skew, kurtosis = calculate_descriptive_stats(plot_data['Sharpe Ratio'])

print(f'DESCRIPTIVE STATISTICS FOR FEATURE LAGS MAXIMUM DEPTH:\n')
print(f'Mean = {mean}')
print(f'Standard Deviation = {standard_dev}')
print(f'Skew = {skew}')
print(f'Kurtosis = {kurtosis}\n')

plot_data.hvplot.bar(
    title = 'Strategy Sharpe Ratio Per Feature Lag (Days)', 
    y='Sharpe Ratio', 
    x='FeatureLag', 
    xlabel ='Feature Lag (Days)', 
    height=500,
    width = 1500
).opts(xrotation=90)

In [None]:
# Calculate & Plot descriptive statistics for Sortino Ratio
mean, standard_dev, skew, kurtosis = calculate_descriptive_stats(plot_data['Sortino Ratio'])

print(f'DESCRIPTIVE STATISTICS:\n')
print(f'Mean = {mean}')
print(f'Standard Deviation = {standard_dev}')
print(f'Skew = {skew}')
print(f'Kurtosis = {kurtosis}\n')

plot_data.hvplot.bar(
    title = 'Strategy Sortino Ratio Per Feature Lag (Days)', 
    y='Sortino Ratio', 
    x='FeatureLag', 
    xlabel ='Feature Lag (Days)',
    colorbar = True,
    height=500,
    width = 1500
).opts(xrotation=90)

In [None]:
# Calculate & Plot descriptive statistics for Calmar Ratio
mean, standard_dev, skew, kurtosis = calculate_descriptive_stats(plot_data['Calmar Ratio'])

print(f'DESCRIPTIVE STATISTICS:\n')
print(f'Mean = {mean}')
print(f'Standard Deviation = {standard_dev}')
print(f'Skew = {skew}')
print(f'Kurtosis = {kurtosis}\n')

plot_data.hvplot.bar(
    title = 'Strategy Calmar Ratio Per Feature Lag (Days)', 
    y='Calmar Ratio', 
    x='FeatureLag', 
    xlabel ='Feature Lag (Days)',
    colorbar = True,
    height=500,
    width = 1500
).opts(xrotation=90)

In [None]:
# Calculate & Plot descriptive statistics for Max Drawdown
mean, standard_dev, skew, kurtosis = calculate_descriptive_stats(plot_data['Max Drawdown'])

print(f'DESCRIPTIVE STATISTICS:\n')
print(f'Mean = {mean}')
print(f'Standard Deviation = {standard_dev}')
print(f'Skew = {skew}')
print(f'Kurtosis = {kurtosis}\n')

plot_data.hvplot.bar(
    title = 'Strategy Max Drawdown Per Feature Lag (Days)', 
    y='Max Drawdown', 
    x='FeatureLag', 
    xlabel ='Feature Lag (Days)',
    colorbar = True,
    height=500,
    width = 1500
).opts(xrotation=90)

In [None]:
# Calculate & Plot descriptive statistics for Calmar Ratio
mean, standard_dev, skew, kurtosis = calculate_descriptive_stats(plot_data['Calmar Ratio'])

print(f'DESCRIPTIVE STATISTICS:\n')
print(f'Mean = {mean}')
print(f'Standard Deviation = {standard_dev}')
print(f'Skew = {skew}')
print(f'Kurtosis = {kurtosis}\n')

plot_data.hvplot.bar(
    title = 'Strategy Alpha Per Feature Lag (Days)', 
    y='Calmar Ratio', 
    x='FeatureLag', 
    xlabel ='Feature Lag (Days)',
    colorbar = True,
    height=500,
    width = 1500
).opts(xrotation=90)

In [None]:
# Calculate & Plot descriptive statistics for Beta
mean, standard_dev, skew, kurtosis = calculate_descriptive_stats(plot_data['Beta'])

print(f'DESCRIPTIVE STATISTICS:\n')
print(f'Mean = {mean}')
print(f'Standard Deviation = {standard_dev}')
print(f'Skew = {skew}')
print(f'Kurtosis = {kurtosis}\n')

plot_data.hvplot.bar(
    title = 'Strategy Beta Per Feature Lag (Days)', 
    y='Beta', 
    x='FeatureLag', 
    xlabel ='Feature Lag (Days)',
    colorbar = True,
    height=500,
    width = 1500
).opts(xrotation=90)