### ------------------------------------------------------------------------------------------------------------

In [None]:
import os

if 'COLAB_GPU' in os.environ:
    !git clone https://github.com/impulsecorp/PickStocks.git
    !mv PickStocks/*.py .
    !mv PickStocks/data .
    !pip install -U -qq -r PickStocks/requirements.txt

In [None]:
import warnings
warnings.filterwarnings("ignore")
import system
from system import *
# small hack to prevent Colab error
try:
    from datablock import *
except:
    from datablock import *
seed

In [None]:
%pylab inline

### Setup

In [None]:
data_timeperiod = '15min'
data = get_data('SPY', period=data_timeperiod, nrows=None)
data = procdata_lite(data)

In [None]:
# for inspectiion
print(data.shape)
data.head()

In [None]:
system.train_set_end = 0.5 # percentage point specifying the training set end point (1.0 means all data is training set)
system.val_set_end = 0.75 # percentage point specifying the validation set end point (1.0 means no test set)
# basically this is the data with the values above, which are like sliders determining the layout
# [|0.0| ...... train ............. |0.5| ............ val ............ |0.75| .............. test ............... |1.0|]

### ------------------------------------------------------------------------------------------------------------

### Base test

In [None]:
# Train LogisticRegression ensemble on train data
clf = train_ensemble(LogisticRegression, data, ensemble_size=100)

In [None]:
equity, pf, trades = qbacktest(clf, data)

In [None]:
trades.head()

### Search for best min_confidence

In [None]:
max_conf_seen = np.max(np.abs(0.5-trades['pred'].values)*2.0)
max_conf_seen

In [None]:
# Compute the profit factor for every candidate value
levels = np.linspace(0.0, max_conf_seen, 100)
pfs = []
nts = []
for l in tqdm(levels):
    pf, ntrades = compute_stats(data, filter_trades_by_confidence(trades, min_conf=l))
    pfs.append(pf)
    nts.append(len(ntrades))

In [None]:
# Plot the optimization/search results
plt.plot(levels, pfs);
plt.xlabel('Confidence');
plt.ylabel('Profit Factor');

In [None]:
res = pd.DataFrame(data = np.hstack([ np.array(nts).reshape(-1,1),
                                      np.array(pfs).reshape(-1,1)]),
             index=np.array(levels),
             columns=['num trades', 'profit factor'])
res

In [None]:
best_min_confidence = 0.215

### Test on unseen data

In [None]:
# Base test without the filter
equity, pf, trades = qbacktest(clf, data, skip_val=1, skip_test=0)

In [None]:
# Test with the filter
# filter stats
pf, ntrades = compute_stats(data, filter_trades_by_confidence(trades, min_conf=best_min_confidence))
print(f'Profit factor: {get_profit_factor(ntrades):.5f}, Winners: {get_winner_pct(ntrades):.2f}%, Trades: {len(ntrades)}')

In [None]:
plot(ntrades['profit'].cumsum());

In [None]:
ntrades[0:20]

### ------------------------------------------------------------------------------------------------------------