In [1]:
import utils
import models.categorical
import math
import pickle
import numpy as np
import pandas as pd
from datetime import date

from sklearn.model_selection import ParameterGrid
from tqdm import tqdm

import warnings
warnings.filterwarnings("ignore")

In [2]:
fourCandleHammerParams = [
    {
        "N": [30, 40],
        "highFactor": [0.9, 0.95],
        "lowFactor": [1.05, 1.1]
    }
]

emaParams = [
    {
        "shortTermDays": [20, 40],
        "longTermDays": [50, 200]
    }
]

swingSetupParams = [
    {
        "shortTermDays": [20, 30],
        "longTermDays": [40, 60],
        "highFactor": [1.002, 1.005],
        "lowFactor": [0.995, 0.998]
    }
]

start_date = date(2010, 1, 1)
end_date = date(2021, 1, 1)
futuresList = utils.futuresList

## Hyperparameter Tuning for FourCandleHammer Strategy

In [3]:
# retrieve parameter grid
parameter_grid = list(ParameterGrid(fourCandleHammerParams))
y_var = "LONG_SHORT"
file_dir = "fourCandleHammer/perc/"

for future in tqdm(futuresList):
    print(future)

    # load data
    df = utils.prepare_data(future)
    
    # generate X vars
    X_vars = ['OPEN', 'HIGH', 'LOW', 'CLOSE', 'VOL']
    
    # load X and y
    X_df, y_df = utils.generate_X_y(df, X_vars=X_vars, y_var=y_var)
    cost_df = df["CLOSE_PCT"]

    agg_results_collated = pd.DataFrame(index=list(range(len(parameter_grid))), 
                                        columns=["accuracy_SMA", "opp_cost_SMA"])
    win_results_collated = []

    # run walk forward validation 
    for i in range(len(parameter_grid)):
        param_set = parameter_grid[i]
        strategy_output = utils.fourCandleHammer(X_df['CLOSE'], param_set['N'], param_set['highFactor'], param_set['lowFactor'])
        win_results, agg_results = models.categorical.walk_forward_techIndicators(
            strategy_output = strategy_output, X = X_df, y = y_df, cost_weight = cost_df, rolling = True, 
            max_windows = 100, start_index = start_date
        )
        win_results_collated.append(win_results)
        agg_results_collated.loc[i, "accuracy_SMA"] = agg_results.loc["SMA", "accuracy"]
        agg_results_collated.loc[i, "opp_cost_SMA"] = agg_results.loc["SMA", "opp_cost"]

    # save parameters
    parameter_df = pd.DataFrame.from_records(parameter_grid)
    combined_df = pd.concat([parameter_df, agg_results_collated], axis=1)
    combined_df = combined_df.sort_values(by=["accuracy_SMA", "opp_cost_SMA"], ascending=False)
    combined_df.to_csv(f"model_metrics/categorical/{file_dir}{future}.csv", index=False)

  0%|          | 0/88 [00:00<?, ?it/s]F_AD
  1%|          | 1/88 [00:40<58:19, 40.22s/it]F_BO
  2%|▏         | 2/88 [01:21<57:55, 40.42s/it]F_BP
  3%|▎         | 3/88 [02:05<59:07, 41.74s/it]F_C
  5%|▍         | 4/88 [03:07<1:06:53, 47.78s/it]F_CC
  6%|▌         | 5/88 [04:09<1:11:54, 51.98s/it]F_CD
  7%|▋         | 6/88 [05:08<1:14:04, 54.20s/it]F_CL
  8%|▊         | 7/88 [06:00<1:11:55, 53.27s/it]F_CT
  9%|▉         | 8/88 [06:42<1:06:43, 50.05s/it]F_DX
 10%|█         | 9/88 [07:30<1:04:57, 49.34s/it]F_EC
 11%|█▏        | 10/88 [08:39<1:11:52, 55.28s/it]F_ED
 12%|█▎        | 11/88 [09:44<1:14:33, 58.10s/it]F_ES
 14%|█▎        | 12/88 [10:33<1:10:16, 55.48s/it]F_FC
 15%|█▍        | 13/88 [11:20<1:06:12, 52.97s/it]F_FV
 16%|█▌        | 14/88 [11:58<59:49, 48.51s/it]  F_GC
 17%|█▋        | 15/88 [12:38<55:42, 45.79s/it]F_HG
 18%|█▊        | 16/88 [13:30<57:13, 47.69s/it]F_HO
 19%|█▉        | 17/88 [14:11<54:14, 45.84s/it]F_JY
 20%|██        | 18/88 [15:14<59:28, 50.98s/it]F_KC
 22%|██▏ 

ValueError: Found input variables with inconsistent numbers of samples: [531, 37]

In [None]:
combined_df

In [None]:
agg_results_collated

## Hyperparameter Tuning for EMA Strategy

In [None]:
# retrieve parameter grid
parameter_grid = list(ParameterGrid(emaParams))
y_var = "LONG_SHORT"
file_dir = "ema/perc/"

for future in tqdm(futuresList):
    print(future)

    # load data
    df = utils.prepare_data(future)
    
    # generate X vars
    X_vars = ['OPEN', 'HIGH', 'LOW', 'CLOSE', 'VOL']
    
    # load X and y
    X_df, y_df = utils.generate_X_y(df, X_vars=X_vars, y_var=y_var)
    cost_df = df["CLOSE_PCT"]

    agg_results_collated = pd.DataFrame(index=list(range(len(parameter_grid))), 
                                        columns=["accuracy_SMA", "opp_cost_SMA"])
    win_results_collated = []

    # run walk forward validation 
    for i in range(len(parameter_grid)):
        param_set = parameter_grid[i]
        strategy_output = utils.fourCandleHammer(X_df['CLOSE'], param_set['N'], param_set['highFactor'], param_set['lowFactor'])
        win_results, agg_results = models.categorical.walk_forward_techIndicators(
            strategy_output = strategy_output, X = X_df, y = y_df, cost_weight = cost_df, rolling = True, 
            max_windows = 100, start_index = start_date
        )
        win_results_collated.append(win_results)
        agg_results_collated.loc[i, "accuracy_SMA"] = agg_results.loc["SMA", "accuracy"]
        agg_results_collated.loc[i, "opp_cost_SMA"] = agg_results.loc["SMA", "opp_cost"]

    # save parameters
    parameter_df = pd.DataFrame.from_records(parameter_grid)
    combined_df = pd.concat([parameter_df, agg_results_collated], axis=1)
    combined_df = combined_df.sort_values(by=["accuracy_SMA", "opp_cost_SMA"], ascending=False)
    combined_df.to_csv(f"model_metrics/categorical/{file_dir}{future}.csv", index=False)