### ------------------------------------------------------------------------------------------------------------

In [None]:
import os

if 'COLAB_GPU' in os.environ:
    !git clone https://github.com/impulsecorp/PickStocks.git
    !mv PickStocks/*.py .
    !mv PickStocks/data .
    !pip install -U -qq -r PickStocks/requirements.txt

In [None]:
import warnings
warnings.filterwarnings("ignore")
import system
from system import *
# small hack to prevent Colab error
try:
    from datablock import *
except:
    from datablock import *
from backtesting import Backtest
from backtesting.lib import compute_stats
seed

In [None]:
%pylab inline

### ------------------------------------------------------------------------------------------------------------

In [None]:
%%time

data_timeperiod = '5min'
data = get_data('SPY', period=data_timeperiod)
data = procdata_lite(data, use_tsfel=0)

In [None]:
# print(get_data_features(data))
print(data.shape)
data.head()

In [None]:
system.train_set_end = 0.4 # percentage point specifying the training set end point (1.0 means all data is training set)
system.val_set_end = 0.7 # percentage point specifying the validation set end point (1.0 means no test set)
# basically this is the data with the values above, which are like sliders determining the layout
# [|0.0| ...... train ........ |0.4| ............ val ............ |0.7| .............. test ............... |1.0|]

In [None]:
%%time

# mode is 'opt' because we'll attempt to find the best confidence value for this period
consts = dict(mode=['opt'], clf_class=[LogisticRegression], period=[data_timeperiod])
bt = Backtest(data, MLEnsembleStrategy, **btkw)
stats = bt.run(**{x:consts[x][0] for x in consts.keys()},
               num_clfs=50)
display(stats)

In [None]:
plot_result(bt, [stats])

In [None]:
trades = stats['_trades']
print(trades.head())

In [None]:
plot(stats['_trades']['PnL'].cumsum()); # base classifier-only curve

In [None]:
def filter_trades_by_prediction(trades, predictions, threshold=0.0):
    # Create a copy of the trades DataFrame
    filtered_trades = trades.copy()

    # Get the relevant portion of the predictions indicator that corresponds to the trades
    relevant_predictions = predictions.df.iloc[filtered_trades['EntryBar']]

    # Add the rescaled predictions as a new column to the trades DataFrame
    filtered_trades['PredictionValue'] = np.abs(relevant_predictions.values - 0.5) * 2.0

    # Filter the trades by the prediction value
    filtered_trades = filtered_trades.loc[filtered_trades['PredictionValue'] > threshold]

    return filtered_trades

In [None]:
from hyperopt import fmin, tpe, hp

def objective(threshold):
    # Filter trades by the threshold
    filtered_trades = filter_trades_by_prediction(trades, stats._strategy.predictions, threshold=threshold)

    # Compute the new statistics
    new_stats = compute_stats(stats=stats, data=data, trades=filtered_trades)

    # Return the profit factor as the objective
    return -new_stats['Win Rate [%]']

# Define the hyperparameter space
space = hp.uniform('threshold', 0.001, 0.999)

In [None]:
# Run the hyperparameter optimization
best = fmin(fn=objective, space=space, algo=tpe.suggest, max_evals=20)

# Get the best threshold value found
best_threshold = best['threshold']
best_threshold

In [None]:
filter_trades_by_prediction(trades, stats._strategy.predictions, best_threshold).head()

In [None]:
best_stats = compute_stats(stats=stats, data=data, trades=filter_trades_by_prediction(trades, stats._strategy.predictions, best_threshold))
best_stats

In [None]:
best_stats['_trades'].head()

In [None]:
plot(best_stats['_trades']['PnL'].cumsum());

### ------------------------------------------------------------------------------------------------------------

In [None]:
# test on unseen data
# mode is 'test'
consts = dict(mode=['test'], clf_class=[LogisticRegression], period=[data_timeperiod])
bt = Backtest(data, MLEnsembleStrategy, **btkw)
test_stats = bt.run(**{x:consts[x][0] for x in consts.keys()},
               num_clfs=50, min_confidence=best_threshold) # this uses the built-in confidence logic in the strategy so it applies the best filter in real-time
test_trades = test_stats['_trades']
test_stats

In [None]:
test_trades.head()

In [None]:
plot(test_stats['_trades']['PnL'].cumsum());

### ------------------------------------------------------------------------------------------------------------