In [None]:
import utils
import math
import pickle
import numpy as np
import pandas as pd
from datetime import datetime

from xgboost import XGBClassifier
from sklearn.model_selection import ParameterGrid
from tqdm import tqdm

import warnings
warnings.filterwarnings("ignore")

# Train Meta Models for All

In [None]:
# define variables and paths
path = 'meta_model_predictions/'
model_paths = {'logreg': 'categorical/logreg/pct_macro_thresh', \
               'rf': 'categorical/rf/pct_tech_macro', \
               'xgb': 'categorical/xgb/pct_tech', \
               'arima': 'arimaEnsemble', \
               'emaStrategy': 'emaStrategy', \
               'fourCandleHammer': 'fourCandleHammer', \
               'swingSetup': 'swingSetup'}

train_start = datetime(2018, 10, 1)
train_end = datetime(2020, 9, 30)
val_start = datetime(2020, 10, 1)
val_end = datetime(2020, 12, 31)

In [None]:
xgbParams = [{'booster': ['gbtree'],
              'learning_rate': [0.01, 0.1, 0.3], # default 0.3
              'gamma': [0, 0.5, 1], # higher means more regularization
              'max_depth': [2, 4, 6, 8], # default 6
}]

# parameter grid
parameter_grid = list(ParameterGrid(xgbParams))

In [None]:
# store predictions in a dictionary
future_preds_dict = dict()

# store validation data in a dictionary
# val_data = dict()

for future in tqdm(utils.futuresList):
    # print(future)
    # merge base model probability predictions
    future_df = pd.DataFrame()
    for model_name, model_path in model_paths.items():
        data = pd.read_csv(f'{path}{model_path}/{future}.csv')
        data.columns = ['date', model_name]
        data['date'] = pd.to_datetime(data['date'])
        data = data.set_index('date')
        # filter data within date range
        data = data.loc[(data.index >= train_start) & (data.index <= val_end)]
        if len(future_df) == 0:
            future_df = data.copy()
        else:
            future_df = pd.merge(future_df, data, on=['date'], how='outer')

    # load and extract y_variable data
    y_var = "LONG_SHORT" # DO NOT CHANGE THIS
    df = utils.prepare_data(future)
    df = df.loc[(df.index >= train_start) & (df.index <= val_end)]
    future_df[y_var] = df[y_var].values
    
    # train test split
    train_df = future_df.loc[(future_df.index >= train_start) & (future_df.index <= train_end)]
    val_df = future_df.loc[(future_df.index >= val_start) & (future_df.index <= val_end)]
    # val_data[future] = val_df.copy() # store in validation dictionary
    
    X_train = train_df[model_paths.keys()]
    y_train = train_df[y_var]
    
    X_val = val_df[model_paths.keys()]
        
    for i in range(len(parameter_grid)):
        param_set = parameter_grid[i]
        params = f"lr{param_set['learning_rate']}_g{param_set['gamma']}_d{param_set['max_depth']}"
        xgb_model = XGBClassifier(objective='binary:logistic', eval_metric='logloss', **param_set)
        
        # train meta model
        # xgb_model = XGBClassifier(objective='binary:logistic', eval_metric='logloss')
        xgb_model.fit(X_train, y_train)

        # save model
        # with open(f'saved_models/meta/meta_model_base/{future}.p', 'wb') as f:
        #     pickle.dump(xgb_model, f)

        # load saved model
        # with open(f'saved_models/meta/meta_model_base/{future}.p', 'rb') as f:
        #     model = pickle.load(f)

        # predict prob long
        future_preds = xgb_model.predict_proba(X_val)[:, 1]
        
        future_preds_df = pd.DataFrame(index=X_val.index)
        future_preds_df['preds'] = future_preds

        # store predictions in a dictionary
        future_preds_dict[future, params] = future_preds_df.copy()

In [None]:
# save dictionary
with open("meta_model_predictions/future_preds_dict_2.pkl", "wb") as f:
    pickle.dump(future_preds_dict, f)

In [None]:
# saved_dict = pickle.load(open("meta_model_predictions/future_preds_dict_2.pkl", "rb"))

# Retrieve Sharpe from Quantiacs
Transferred to `meta_model_validation.py`

In [None]:
import quantiacsToolbox

from strategy import (
    basic_strategy, 
    long_only,
    short_only,
    fixed_threshold_strategy, 
    perc_threshold_strategy,
    futures_only,
    futures_hold,
    cash_and_futures,
)

# for i in range(len(parameter_grid)):
i = 17 ## change this

param_set = parameter_grid[i]

params = f"lr{param_set['learning_rate']}_g{param_set['gamma']}_d{param_set['max_depth']}"

# retrieve preds
future_preds_dict = pickle.load(open("meta_model_predictions/future_preds_dict.pkl", "rb"))
preds_dict_final = dict()
for future in utils.futuresList:
    preds_dict_final[future] = future_preds_dict[future, params].copy()

# define class
class metaModelValidation(object):
    # throw preds into quantiacs
    def myTradingSystem(self, DATE, settings):
        ''' This system uses trend following techniques to allocate capital into the desired equities'''

        # Get saved X variables
        prediction = pd.DataFrame(index=utils.futuresList)
        for future in tqdm(utils.futuresList):
            # read data
            try:
                future_pred = settings['saved_predictions'][future].loc[datetime.strptime(str(DATE[-1]), '%Y%m%d')][0]
                prediction.loc[future, 'meta'] = future_pred
            except:
                print('ERROR: ', future, str(DATE[-1]))
                prediction.loc[future, 'meta'] = 0

        sign = utils.sign(prediction)
        magnitude = utils.magnitude(prediction)

        position = basic_strategy(sign['meta'], magnitude['meta']) 

        # Cash-futures strategy
        position = futures_only(position)

        # Update persistent data across runs
        settings['sign'].append(sign)
        settings['magnitude'].append(magnitude)
        settings['previous_position'] = position

        # Yay!
        return position, settings

    def mySettings(self):
        ''' Define your trading system settings here '''
        settings= {}
        settings['markets']  = utils.futuresAllList
        settings['beginInSample'] = '20181020'
        settings['endInSample'] = '20201231'
        settings['lookback']= 504
        settings['budget']= 10**6
        settings['slippage']= 0.05

        # Stuff to persist
        settings['saved_predictions'] = preds_dict_final ## update this
        settings['sign'] = []
        settings['magnitude'] = []

        return settings

results = quantiacsToolbox.runts(metaModelValidation, plotEquity=False)
sharpe = results["stats"]["sharpe"]

sharpe_results = pd.read_csv("meta_model_predictions/sharpe_results.csv")
sharpe_results = sharpe_results.append({'params': params, 'sharpe': sharpe}, ignore_index=True)
sharpe_results.to_csv("meta_model_predictions/sharpe_results.csv", index=False)