### ------------------------------------------------------------------------------------------------------------

In [None]:
import os

if 'COLAB_GPU' in os.environ:
    !git clone https://github.com/impulsecorp/PickStocks.git
    !mv PickStocks/*.py .
    !mv PickStocks/data .
    !pip install -U -qq -r PickStocks/requirements.txt

In [None]:
import warnings
warnings.filterwarnings("ignore")
import system
from system import *
# small hack to prevent Colab error
try:
    from datablock import *
except:
    from datablock import *
seed

In [None]:
%pylab inline

### Setup

In [None]:
data_timeperiod = 'D'
data = get_data('SPY', period=data_timeperiod, nrows=None)
data = procdata_lite(data)

In [None]:
# for inspectiion
print(data.shape)
data.head()

In [None]:
system.train_set_end = 0.7 # percentage point specifying the training set end point (1.0 means all data is training set)
system.val_set_end = 1.0    # percentage point specifying the validation set end point (1.0 means no test set)
system.balance_data = 1
system.scale_data = 1

### ------------------------------------------------------------------------------------------------------------

### LogisticRegression

In [None]:
# Train LogisticRegression classifier on train data
clf, scaler = train_classifier(LogisticRegression, data)

In [None]:
# Test on val data
equity, pf, trades = qbacktest(clf, scaler, data)

In [None]:
trades.head()

### LogisticRegression + HPO

In [None]:
# Train LogisticRegression classifier on train data, but optimize it with HPO first

X_train, y_train = get_clean_Xy(data.iloc[0:int(data.shape[0] * system.train_set_end)])
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
if system.balance_data:
    # Apply SMOTE oversampling to balance the training data
    sm = SMOTE(random_state=newseed())
    X_train, y_train = sm.fit_resample(X_train, y_train)
best_hyperparams = optimize_model(LogisticRegression, 'LogisticRegression', {"C": hp.loguniform("C", -5, 2),
                                                                               "max_iter": hp.choice("max_iter", range(5, 501)),
                                                                               "dual": hp.choice("dual", (True, False)),
                                                                               "fit_intercept": hp.choice("fit_intercept", (True, False))},
                                  X_train, y_train, max_evals=100)
clf, scaler = train_classifier(LogisticRegression, data, **best_hyperparams)

In [None]:
# Test on val data
equity, pf, trades = qbacktest(clf, scaler, data)

In [None]:
trades.head()

### ------------------------------------------------------------------------------------------------------------

### XGBClassifier

In [None]:
# Train XGBClassifier classifier on train data
clf, scaler = train_classifier(XGBClassifier, data)

In [None]:
# Test on val data
equity, pf, trades = qbacktest(clf, scaler, data)

In [None]:
trades.head()

### XGBClassifier + HPO

In [None]:
# Train XGBClassifier classifier on train data, but optimize it with HPO first

X_train, y_train = get_clean_Xy(data.iloc[0:int(data.shape[0] * system.train_set_end)])
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
if system.balance_data:
    # Apply SMOTE oversampling to balance the training data
    sm = SMOTE(random_state=newseed())
    X_train, y_train = sm.fit_resample(X_train, y_train)
best_hyperparams = optimize_model(XGBClassifier, 'XGBClassifier',
                                  {
                                        "max_depth": hp.quniform("max_depth", 2, 12, 1),
                                        "learning_rate": hp.uniform("learning_rate", 0.001, 0.2),
                                        "n_estimators": hp.quniform("n_estimators", 5, 1000, 1),
                                        "min_child_weight": hp.quniform("min_child_weight", 1, 10, 1),
                                        "gamma": hp.uniform("gamma", 0, 1),
                                        "subsample": hp.uniform("subsample", 0.1, 1),
                                        "colsample_bytree": hp.uniform("colsample_bytree", 0.1, 1),
                                        "reg_alpha": hp.uniform("reg_alpha", 0, 1),
                                        "reg_lambda": hp.uniform("reg_lambda", 0, 1),
                                  },
                                  X_train, y_train, max_evals=100)
clf, scaler = train_classifier(XGBClassifier, data, **best_hyperparams)

In [None]:
# Test on val data
equity, pf, trades = qbacktest(clf, scaler, data)

In [None]:
trades.head()

### ------------------------------------------------------------------------------------------------------------

### LGBMClassifier

In [None]:
# Train LGBMClassifier classifier on train data
clf, scaler = train_classifier(LGBMClassifier, data)

In [None]:
# Test on val data
equity, pf, trades = qbacktest(clf, scaler, data)

In [None]:
trades.head()

### LGBMClassifier + HPO

In [None]:
# Train LGBMClassifier classifier on train data, but optimize it with HPO first

X_train, y_train = get_clean_Xy(data.iloc[0:int(data.shape[0] * system.train_set_end)])
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
if system.balance_data:
    # Apply SMOTE oversampling to balance the training data
    sm = SMOTE(random_state=newseed())
    X_train, y_train = sm.fit_resample(X_train, y_train)
best_hyperparams = optimize_model(LGBMClassifier, 'LGBMClassifier',
                                  {
                                        "num_leaves": hp.quniform("num_leaves", 10, 150, 1),
                                        "max_depth": hp.quniform("max_depth", 5, 50, 1),
                                        "learning_rate": hp.uniform("learning_rate", 0.01, 0.2),
                                        "n_estimators": hp.quniform("n_estimators", 50, 1000, 1),
                                        "min_split_gain": hp.uniform("min_split_gain", 0.0, 1.0),
                                        "min_child_weight": hp.uniform("min_child_weight", 0.001, 0.1),
                                        "min_child_samples": hp.quniform("min_child_samples", 5, 100, 1),
                                        "subsample": hp.uniform("subsample", 0.5, 1),
                                        "colsample_bytree": hp.uniform("colsample_bytree", 0.5, 1),
                                        "reg_alpha": hp.uniform("reg_alpha", 0, 1),
                                        "reg_lambda": hp.uniform("reg_lambda", 0, 1),
                                  },
                                  X_train, y_train, max_evals=10)
clf, scaler = train_classifier(LGBMClassifier, data, **best_hyperparams)

In [None]:
# Test on val data
equity, pf, trades = qbacktest(clf, scaler, data)

In [None]:
trades.head()

### ------------------------------------------------------------------------------------------------------------

### RandomForestClassifier

In [None]:
# Train RandomForestClassifier classifier on train data
clf, scaler = train_classifier(RandomForestClassifier, data, n_jobs=-1)

In [None]:
# Test on val data
equity, pf, trades = qbacktest(clf, scaler, data)

In [None]:
trades.head()

### RandomForestClassifier + HPO

In [None]:
# Train RandomForestClassifier classifier on train data, but optimize it with HPO first

X_train, y_train = get_clean_Xy(data.iloc[0:int(data.shape[0] * system.train_set_end)])
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
if system.balance_data:
    # Apply SMOTE oversampling to balance the training data
    sm = SMOTE(random_state=newseed())
    X_train, y_train = sm.fit_resample(X_train, y_train)
best_hyperparams = optimize_model(RandomForestClassifier, 'RandomForestClassifier',
                                  {"n_estimators": hp.choice("n_estimators", range(5, 201)),
                                   "max_depth": hp.choice("max_depth", range(2, 21))},
                                  X_train, y_train, max_evals=10,
                                  n_jobs=-1)
clf, scaler = train_classifier(RandomForestClassifier, data, **best_hyperparams)

In [None]:
# Test on val data
equity, pf, trades = qbacktest(clf, scaler, data)

In [None]:
trades.head()

### ------------------------------------------------------------------------------------------------------------

### ExtraTreesClassifier

In [None]:
# Train ExtraTreesClassifier classifier on train data
clf, scaler = train_classifier(ExtraTreesClassifier, data)

In [None]:
# Test on val data
equity, pf, trades = qbacktest(clf, scaler, data)

In [None]:
trades.head()

### ExtraTreesClassifier + HPO

In [None]:
# Train ExtraTreesClassifier classifier on train data, but optimize it with HPO first

X_train, y_train = get_clean_Xy(data.iloc[0:int(data.shape[0] * system.train_set_end)])
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
if system.balance_data:
    # Apply SMOTE oversampling to balance the training data
    sm = SMOTE(random_state=newseed())
    X_train, y_train = sm.fit_resample(X_train, y_train)
best_hyperparams = optimize_model(ExtraTreesClassifier, 'ExtraTreesClassifier',
                                  {"n_estimators": hp.choice("n_estimators", range(5, 201)),
                                   "max_depth": hp.choice("max_depth", range(2, 21))},
                                  X_train, y_train, max_evals=10,
                                  n_jobs=-1)
clf, scaler = train_classifier(ExtraTreesClassifier, data, **best_hyperparams)

In [None]:
# Test on val data
equity, pf, trades = qbacktest(clf, scaler, data)

In [None]:
trades.head()

### ------------------------------------------------------------------------------------------------------------

### CatBoostClassifier

In [None]:
# Train CatBoostClassifier classifier on train data
clf, scaler = train_classifier(CatBoostClassifier, data, verbose=False)

In [None]:
# Test on val data
equity, pf, trades = qbacktest(clf, scaler, data)

In [None]:
trades.head()

### CatBoostClassifier + HPO

In [None]:
# Train CatBoostClassifier classifier on train data, but optimize it with HPO first

X_train, y_train = get_clean_Xy(data.iloc[0:int(data.shape[0] * system.train_set_end)])
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
if system.balance_data:
    # Apply SMOTE oversampling to balance the training data
    sm = SMOTE(random_state=newseed())
    X_train, y_train = sm.fit_resample(X_train, y_train)
best_hyperparams = optimize_model(CatBoostClassifier, 'CatBoostClassifier',
                                  {
                                    "iterations": hp.quniform("iterations", 50, 1200, 1),
                                    "learning_rate": hp.uniform("learning_rate", 0.01, 0.2),
                                    "depth": hp.quniform("depth", 4, 10, 1),
                                    "l2_leaf_reg": hp.uniform("l2_leaf_reg", 1, 10),
                                    "border_count": hp.quniform("border_count", 128, 256, 1),
                                    "random_strength": hp.uniform("random_strength", 0.5, 2),
                                    "bagging_temperature": hp.uniform("bagging_temperature", 0.5, 2),
                                },
                                  X_train, y_train, max_evals=10, verbose=False)
best_hyperparams['verbose'] = False
clf, scaler = train_classifier(CatBoostClassifier, data, **best_hyperparams)

In [None]:
# Test on val data
equity, pf, trades = qbacktest(clf, scaler, data)

In [None]:
trades.head()

### ------------------------------------------------------------------------------------------------------------