### ------------------------------------------------------------------------------------------------------------

In [None]:
import os

import numpy as np

if 'COLAB_GPU' in os.environ:
    !git clone https://github.com/impulsecorp/PickStocks.git
    !mv PickStocks/*.py .
    !mv PickStocks/data .
    !pip install -U -qq -r PickStocks/requirements.txt

In [None]:
import warnings
warnings.filterwarnings("ignore")
import system
from system import *
# small hack to prevent Colab error
try:
    from datablock import *
except:
    from datablock import *
seed

In [None]:
%pylab inline

### Setup

In [None]:
data_timeperiod = 'D'
data = get_data('SPY', period=data_timeperiod, nrows=None)
data = procdata_lite(data)

In [None]:
# for inspectiion
print(data.shape)
data.head()

In [None]:
system.train_set_end = 0.5 # percentage point specifying the training set end point (1.0 means all data is training set)
system.val_set_end = 0.75 # percentage point specifying the validation set end point (1.0 means no test set)
system.balance_data = 1
system.scale_data = 1

combine_method = 'or' # 'or'/'and'

### ------------------------------------------------------------------------------------------------------------

### Base test

In [None]:
# Train ensemble on train data
clf, scaler = train_ensemble(LogisticRegression, data, ensemble_size=10)

In [None]:
equity, pf, base_trades = qbacktest(clf, scaler, data)

In [None]:
base_trades.head()

### Search for the best bins for each feature

In [None]:
top_coords, feature_names, feat_bins, df = compute_feature_matrix(data, base_trades,
                                        min_pf=1.0,
                                        min_trades=10,
                                        max_trades=10000,
                                        topn=8)
df

In [None]:
# Description of the discovered best bins - the feature names and the bin boundaries
for i in range(len(top_coords)):
    r,c = top_coords[i]
    _, ntrades = compute_stats(data, filter_trades_by_feature(base_trades, data, featformat(feature_names[r]), min_value=feat_bins[r][c-1], max_value=feat_bins[r][c]))
    print(feature_names[r], f'[{feat_bins[r][c-1]:.5f} .. {feat_bins[r][c]:.5f}]')

### Combine all good strategies into one big strategy and check the performance on training data

In [None]:
# this is done by simply merging all trade lists and then removing the duplicate trades
alltrades = []
for i in range(len(top_coords)):
    r,c = top_coords[i]
    _, mtrades = compute_stats(data, filter_trades_by_feature(base_trades, data, featformat(feature_names[r]), min_value=feat_bins[r][c-1], max_value=feat_bins[r][c]))
    alltrades.append(mtrades)
alltrades = combined_trades(alltrades, combine_method=combine_method)
plt.plot(alltrades['profit'].cumsum())
print(f'Profit factor: {get_profit_factor(alltrades):.5f}, Winners: {get_winner_pct(alltrades):.2f}%, Trades: {len(alltrades)}')

In [None]:
alltrades.head()

### ------------------------------------------------------------------------------------------------------------

### Test the strategy on unseen data

In [None]:
# Base test without the filter
equity, _, test_trades = qbacktest(clf, scaler, data, skip_val=1, skip_test=0)

In [None]:
# Test with the filter
alltrades = []
for r,c in top_coords:
    _, mtrades = compute_stats(data, filter_trades_by_feature(test_trades, data, featformat(feature_names[r]), min_value=feat_bins[r][c-1], max_value=feat_bins[r][c]))
    alltrades.append(mtrades)
alltrades = combined_trades(alltrades, combine_method=combine_method)
plt.plot(alltrades['profit'].cumsum())
print(f'Profit factor: {get_profit_factor(alltrades):.5f}, Winners: {get_winner_pct(alltrades):.2f}%, Trades: {len(alltrades)}')

In [None]:
alltrades[0:20]

### ------------------------------------------------------------------------------------------------------------