### ------------------------------------------------------------------------------------------------------------

In [None]:
import os

if 'COLAB_GPU' in os.environ:
    !git clone https://github.com/impulsecorp/PickStocks.git
    !mv PickStocks/*.py .
    !mv PickStocks/data .
    !pip install -U -qq -r PickStocks/requirements.txt

In [None]:
import warnings
warnings.filterwarnings("ignore")
import system
from system import *
# small hack to prevent Colab error
try:
    from datablock import *
except:
    from datablock import *
seed

In [None]:
%pylab inline

### Setup

In [None]:
data_timeperiod = 'D'
data = get_data('SPY', period=data_timeperiod, nrows=None)
data = procdata_lite(data)

In [None]:
# for inspectiion
print(data.shape)
data.head()

In [None]:
system.regression = 1 # don't change this

In [None]:
system.train_set_end = 0.7 # percentage point specifying the training set end point (1.0 means all data is training set)
system.val_set_end = 1.0    # percentage point specifying the validation set end point (1.0 means no test set)
system.regression_move_threshold = 2.5 # emits a trade order if the predicted move is bigger than this

### ------------------------------------------------------------------------------------------------------------

### LinearRegression

In [None]:
# Train LogisticRegression classifier on train data
reg, scaler = train_regressor(LinearRegression, data)

In [None]:
# Test on val data
*_, trades = rbacktest(reg, scaler, data)

In [None]:
trades.head()

### LinearRegression + HPO

In [None]:
# Train LinearRegression on train data, but optimize it with HPO first

X_train, y_train = get_clean_Xy(data.iloc[0:int(data.shape[0] * system.train_set_end)])
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
best_hyperparams = optimize_model(LinearRegression, 'LinearRegression', {"fit_intercept": hp.choice("fit_intercept", (True, False))},
                                  X_train, y_train, max_evals=100)
reg, scaler = train_regressor(LinearRegression, data, **best_hyperparams)

In [None]:
# Test on val data
*_, trades = rbacktest(reg, scaler, data)

In [None]:
trades.head()

### ------------------------------------------------------------------------------------------------------------

### XGBRegressor

In [None]:
# Train XGBRegressor on train data
reg, scaler = train_regressor(XGBRegressor, data)

In [None]:
# Test on val data
*_, trades = rbacktest(reg, scaler, data)

In [None]:
trades.head()

### XGBRegressor + HPO

In [None]:
# Train XGBRegressor on train data, but optimize it with HPO first

X_train, y_train = get_clean_Xy(data.iloc[0:int(data.shape[0] * system.train_set_end)])
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
best_hyperparams = optimize_model(XGBRegressor, 'XGBRegressor',
                                  {"n_estimators": hp.choice("n_estimators", range(5, 201)),
                                   "learning_rate": hp.loguniform("learning_rate", -5, 0), "max_depth": hp.choice("max_depth", range(2, 11))},
                                  X_train, y_train, max_evals=10)
reg, scaler = train_regressor(XGBRegressor, data, **best_hyperparams)

In [None]:
# Test on val data
*_, trades = rbacktest(reg, scaler, data)

In [None]:
trades.head()

### ------------------------------------------------------------------------------------------------------------

### LGBMRegressor

In [None]:
# Train LGBMRegressor on train data
reg, scaler = train_regressor(LGBMRegressor, data)

In [None]:
# Test on val data
*_, trades = rbacktest(reg, scaler, data)

In [None]:
trades.head()

### LGBMRegressor + HPO

In [None]:
# Train LGBMRegressor on train data, but optimize it with HPO first

X_train, y_train = get_clean_Xy(data.iloc[0:int(data.shape[0] * system.train_set_end)])
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
best_hyperparams = optimize_model(LGBMRegressor, 'LGBMRegressor',
                                  {"n_estimators": hp.choice("n_estimators", range(5, 201)),
                                  "learning_rate": hp.loguniform("learning_rate", -5, 0),
                                  "max_depth": hp.choice("max_depth", range(2, 11))},
                                  X_train, y_train, max_evals=100)
reg, scaler = train_regressor(LGBMRegressor, data, **best_hyperparams)

In [None]:
# Test on val data
*_, trades = rbacktest(reg, scaler, data)

In [None]:
trades.head()

### ------------------------------------------------------------------------------------------------------------

### RandomForestRegressor

In [None]:
# Train RandomForestRegressor on train data
reg, scaler = train_regressor(RandomForestRegressor, data, n_jobs=-1)

In [None]:
# Test on val data
*_, trades = rbacktest(reg, scaler, data)

In [None]:
trades.head()

### RandomForestRegressor + HPO

In [None]:
# Train RandomForestRegressor on train data, but optimize it with HPO first

X_train, y_train = get_clean_Xy(data.iloc[0:int(data.shape[0] * system.train_set_end)])
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
best_hyperparams = optimize_model(RandomForestRegressor, 'RandomForestRegressor',
                                  {"n_estimators": hp.choice("n_estimators", range(5, 201)),
                                   "max_depth": hp.choice("max_depth", range(2, 21))},
                                  X_train, y_train, max_evals=10, n_jobs=-1)
reg, scaler = train_regressor(RandomForestRegressor, data, **best_hyperparams)

In [None]:
# Test on val data
*_, trades = rbacktest(reg, scaler, data)

In [None]:
trades.head()

### ------------------------------------------------------------------------------------------------------------

### ExtraTreesRegressor

In [None]:
# Train ExtraTreesRegressor on train data
reg, scaler = train_regressor(ExtraTreesRegressor, data)

In [None]:
# Test on val data
*_, trades = rbacktest(reg, scaler, data)

In [None]:
trades.head()

### ExtraTreesRegressor + HPO

In [None]:
# Train ExtraTreesRegressor on train data, but optimize it with HPO first

X_train, y_train = get_clean_Xy(data.iloc[0:int(data.shape[0] * system.train_set_end)])
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
best_hyperparams = optimize_model(ExtraTreesRegressor, 'ExtraTreesRegressor',
                                  {"n_estimators": hp.choice("n_estimators", range(5, 201)),
                                   "max_depth": hp.choice("max_depth", range(2, 21))},
                                  X_train, y_train, max_evals=50, n_jobs=-1)
reg, scaler = train_regressor(ExtraTreesRegressor, data, **best_hyperparams)

In [None]:
# Test on val data
*_, trades = rbacktest(reg, scaler, data)

In [None]:
trades.head()

### ------------------------------------------------------------------------------------------------------------

### CatBoostRegressor

In [None]:
# Train CatBoostRegressor on train data
reg, scaler = train_regressor(CatBoostRegressor, data, verbose=False)

In [None]:
# Test on val data
*_, trades = rbacktest(reg, scaler, data)

In [None]:
trades.head()

### CatBoostRegressor + HPO

In [None]:
# Train CatBoostClassifier classifier on train data, but optimize it with HPO first

X_train, y_train = get_clean_Xy(data.iloc[0:int(data.shape[0] * system.train_set_end)])
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
best_hyperparams = optimize_model(CatBoostRegressor, 'CatBoostRegressor',
                                  {
                                    "iterations": hp.quniform("iterations", 50, 1200, 1),
                                    "learning_rate": hp.uniform("learning_rate", 0.01, 0.2),
                                    "depth": hp.quniform("depth", 4, 10, 1),
                                    "l2_leaf_reg": hp.uniform("l2_leaf_reg", 1, 10),
                                    "border_count": hp.quniform("border_count", 128, 256, 1),
                                    "random_strength": hp.uniform("random_strength", 0.5, 2),
                                    "bagging_temperature": hp.uniform("bagging_temperature", 0.5, 2),
                                },
                                  X_train, y_train, max_evals=10, verbose=False)
best_hyperparams['verbose'] = False
reg, scaler = train_regressor(CatBoostRegressor, data, **best_hyperparams)

In [None]:
# Test on val data
*_, trades = rbacktest(reg, scaler, data)

In [None]:
trades.head()

### ------------------------------------------------------------------------------------------------------------