### ------------------------------------------------------------------------------------------------------------

In [None]:
import os

import numpy as np

if 'COLAB_GPU' in os.environ:
    !git clone https://github.com/impulsecorp/PickStocks.git
    !mv PickStocks/*.py .
    !mv PickStocks/data .
    !pip install -U -qq -r PickStocks/requirements.txt

In [None]:
import warnings
warnings.filterwarnings("ignore")
import system
from system import *
# small hack to prevent Colab error
try:
    from datablock import *
except:
    from datablock import *
seed

In [None]:
%pylab inline

### Setup

In [None]:
data_timeperiod = '15min'
data = get_data('SPY', period=data_timeperiod, nrows=None)
data = procdata_lite(data)

In [None]:
# for inspectiion
print(data.shape)
data.head()

In [None]:
system.train_set_end = 0.5 # percentage point specifying the training set end point (1.0 means all data is training set)
system.val_set_end = 0.75 # percentage point specifying the validation set end point (1.0 means no test set)
# basically this is the data with the values above, which are like sliders determining the layout
# [|0.0| ...... train ............. |0.5| ............ val ............ |0.75| .............. test ............... |1.0|]

### ------------------------------------------------------------------------------------------------------------

### Base test

In [None]:
feature_names = [featdeformat(x) for x in data.filter(like='X')]
feature_names

In [None]:
feature_ranges = []
for fn in feature_names:
    d = data[featformat(fn)].values
    feature_ranges.append((np.min(d), np.max(d)))
feature_ranges

In [None]:
# bins for each feature
num_bins = 10 + 1
feat_bins = []
for fmin, fmax in feature_ranges:
    feat_bins.append(np.linspace(fmin, fmax, num_bins))

In [None]:
# Train classifier on train data
clf = train_classifier(LogisticRegression, data)

In [None]:
equity, pf, trades = qbacktest(clf, data)

In [None]:
trades.head()

### Search for the best bins for each feature

In [None]:
pf_matrix = []
nt_matrix = []
wn_matrix = []
for fname, bins in zip(tqdm(feature_names), feat_bins):
    pfs = []
    nts = []
    wns = []
    for i in range(1,len(bins)):
        pf, ntrades = compute_stats(data, filter_trades_by_feature(trades, data, featformat(fname), min_value=bins[i-1], max_value=bins[i]))
        pfs.append(pf)
        nts.append(len(ntrades))
        wns.append(get_winner_pct(ntrades))
    pf_matrix.append(pfs)
    nt_matrix.append(nts)
    wn_matrix.append(wns)
pf_matrix = np.array(pf_matrix)
nt_matrix = np.array(nt_matrix)
wn_matrix = np.array(wn_matrix)

### Compute the PF matrix, take the top N

In [None]:
sorted_pfmatrix = np.dstack(np.unravel_index(np.argsort(pf_matrix.ravel())[::-1], (pf_matrix.shape[0], pf_matrix.shape[1]))).squeeze(0)
N = 30
# the top N PFs here
top_pfs = []
top_nts = []
top_wns = []
top_coords = []
for i in range(N):
    coords = (sorted_pfmatrix[i][0], sorted_pfmatrix[i][1])
    top_pfs.append(pf_matrix[coords])
    top_nts.append(nt_matrix[coords])
    top_wns.append(wn_matrix[coords])
    top_coords.append( coords )

list(zip(top_pfs, top_nts, top_wns))

### Combine all good strategies with OR into one big strategy and check the performance on training data

In [None]:
# this is done by simply merging all trade lists and then removing the duplicate trades
alltrades = []
for r,c in top_coords:
    pf, mtrades = compute_stats(data, filter_trades_by_feature(trades, data, featformat(feature_names[r]), min_value=feat_bins[r][c-1], max_value=feat_bins[r][c]))
    alltrades.append(mtrades)
alltrades = pd.concat(alltrades).drop_duplicates().reset_index(drop=True)
plt.plot(alltrades['profit'].cumsum());

In [None]:
alltrades.head()

### ------------------------------------------------------------------------------------------------------------

### Test the strategy on unseen data

In [None]:
# Base test without the filter
equity, pf, trades = qbacktest(clf, data, skip_val=1, skip_test=0)

In [None]:
# Test with the filter
alltrades = []
for r,c in top_coords:
    pf, mtrades = compute_stats(data, filter_trades_by_feature(trades, data, featformat(feature_names[r]), min_value=feat_bins[r][c-1], max_value=feat_bins[r][c]))
    alltrades.append(mtrades)
alltrades = pd.concat(alltrades).drop_duplicates().reset_index(drop=True)
pf = get_profit_factor(alltrades)
plt.plot(alltrades['profit'].cumsum());
print(f'Profit factor: {pf:.5f}, Winners: {get_winner_pct(alltrades):.2f}%, Trades: {len(alltrades)}')

In [None]:
alltrades.head()

### ------------------------------------------------------------------------------------------------------------