### ------------------------------------------------------------------------------------------------------------

In [None]:
import os

import numpy as np

if 'COLAB_GPU' in os.environ:
    !git clone https://github.com/impulsecorp/PickStocks.git
    !mv PickStocks/*.py .
    !mv PickStocks/data .
    !pip install -U -qq -r PickStocks/requirements.txt

In [None]:
import warnings
warnings.filterwarnings("ignore")
import system
from system import *
# small hack to prevent Colab error
try:
    from datablock import *
except:
    from datablock import *
seed

In [None]:
%pylab inline

### Setup

In [None]:
data_timeperiod = '15min'
data = get_data('SPY', period=data_timeperiod, nrows=None)
data = procdata_lite(data)

In [None]:
# for inspectiion
print(data.shape)
data.head()

In [None]:
system.train_set_end = 0.5 # percentage point specifying the training set end point (1.0 means all data is training set)
system.val_set_end = 0.75 # percentage point specifying the validation set end point (1.0 means no test set)
# basically this is the data with the values above, which are like sliders determining the layout
# [|0.0| ...... train ............. |0.5| ............ val ............ |0.75| .............. test ............... |1.0|]

### ------------------------------------------------------------------------------------------------------------

### Base test

In [None]:
feature_names = [featdeformat(x) for x in data.filter(like='X')]

In [None]:
feature_ranges = []
for fn in feature_names:
    d = data[featformat(fn)].values
    feature_ranges.append((np.min(d), np.max(d)))

In [None]:
# bins for each feature
num_bins = 10 + 1
feat_bins = []
for fmin, fmax in feature_ranges:
    feat_bins.append(np.linspace(fmin, fmax, num_bins))
feat_bins = np.array(feat_bins)

In [None]:
# Train classifier on train data
clf = train_classifier(LogisticRegression, data)

In [None]:
equity, pf, base_trades = qbacktest(clf, data)

In [None]:
base_trades.head()

### Search for the best bins for each feature

In [None]:
pf_matrix = []
nt_matrix = []
wn_matrix = []

coords = []

for row_idx, (fname, bins) in enumerate(zip(tqdm(feature_names), feat_bins)):
    pfs = []
    nts = []
    wns = []
    for col_idx in range(1,len(bins)):
        if bins[col_idx-1] > bins[col_idx]:
            bs = bins[col_idx], bins[col_idx-1]
        else:
            bs = bins[col_idx-1], bins[col_idx]
        pf, ntrades = compute_stats(data, filter_trades_by_feature(base_trades, data, featformat(fname), min_value=bs[0], max_value=bs[1]))
        if (pf != -1) and (len(ntrades) > 0):
            pf_matrix.append(pf)
            nt_matrix.append(len(ntrades))
            wn_matrix.append(get_winner_pct(ntrades))
            coords.append((row_idx, col_idx))

### Compute the PF matrix, take the top N

In [None]:
zpd = sorted(list(zip(pf_matrix, nt_matrix, wn_matrix, coords)), key = lambda x: x[2], reverse=True)

In [None]:
N = 20
min_pf = 1.0
min_trades = 50
max_trades = 1000
# the top N PFs here
top_pfs = []
top_nts = []
top_wns = []
top_coords = []
for pf, nt, wn, coords in zpd:
    if (nt >= min_trades) and (nt <= max_trades) and (pf >= min_pf):
        top_pfs.append(pf)
        top_nts.append(nt)
        top_wns.append(wn)
        top_coords.append( coords )
        if len(top_coords) >= N:
            break
pd.DataFrame(data=list(zip(top_pfs, top_nts, top_wns)), columns=['PF', 'Trades', ' % Winners'])

In [None]:
# Description of the discovered best bins - the feature names and the bin boundaries 
for i in range(len(top_pfs)):
    r,c = top_coords[i]
    _, ntrades = compute_stats(data, filter_trades_by_feature(base_trades, data, featformat(feature_names[r]), min_value=feat_bins[r,c-1], max_value=feat_bins[r,c]))
    print(feature_names[r], f'[{feat_bins[r,c-1]:.5f} .. {feat_bins[r,c]:.5f}]')

### Combine all good strategies with OR into one big strategy and check the performance on training data

In [None]:
# this is done by simply merging all trade lists and then removing the duplicate trades
alltrades = []
for i in range(len(top_pfs)):
    r,c = top_coords[i]
    _, mtrades = compute_stats(data, filter_trades_by_feature(base_trades, data, featformat(feature_names[r]), min_value=feat_bins[r][c-1], max_value=feat_bins[r][c]))
    alltrades.append(mtrades)
alltrades = pd.concat(alltrades, axis=0).drop_duplicates().sort_index()
plt.plot(alltrades['profit'].cumsum());
print(f'Profit factor: {get_profit_factor(alltrades):.5f}, Winners: {get_winner_pct(alltrades):.2f}%, Trades: {len(alltrades)}')

In [None]:
alltrades.head()

### ------------------------------------------------------------------------------------------------------------

### Test the strategy on unseen data

In [None]:
# Base test without the filter
equity, _, test_trades = qbacktest(clf, data, skip_val=1, skip_test=0)

In [None]:
# Test with the filter
alltrades = []
for r,c in top_coords:
    _, mtrades = compute_stats(data, filter_trades_by_feature(test_trades, data, featformat(feature_names[r]), min_value=feat_bins[r,c-1], max_value=feat_bins[r,c]))
    alltrades.append(mtrades)
alltrades = pd.concat(alltrades, axis=0).drop_duplicates().sort_index()
plt.plot(alltrades['profit'].cumsum());
print(f'Profit factor: {get_profit_factor(alltrades):.5f}, Winners: {get_winner_pct(alltrades):.2f}%, Trades: {len(alltrades)}')

In [None]:
alltrades[0:20]

### ------------------------------------------------------------------------------------------------------------