## **Backtest models on real data**

This is a working notebook to backtest different models on real data.

NEED MATPLOTLIB Version 3.2.2 for cerebro plotting

In [4]:
import sys
import os 
import yfinance as yf
import numpy as np
import pandas as pd
import tensorflow as tf
import backtrader as bt
from datetime import datetime

local imports

In [20]:
sys.path.append(os.path.abspath(os.path.abspath('..')))
sys.path.append(os.getcwd())

from data_utils import *
from Strategies import *
from predictive_analytics.RNN.functions import transform_raw_data, last_step_accuracy, get_last_step_predictions_with_confidence
from predictive_analytics.transformer.models.basic_transformer import TransformerModel


Get trained models

In [8]:
RNN_MODEL = "../predictive_analytics/RNN/models/beta_1"
LINEAR_MODEL = "../predictive_analytics/transformer/models/basic_linear_model_2"
TRANSFORMER_MODEL = "../predictive_analytics/transformer/models/transformer_checkpoint"

Helper functions

In [47]:
def change_time_format(timestamp): # so that backtrader logs are correct
    reg = "%Y-%m-%d %H:%M:%S"
    time_as_str = timestamp.strftime(reg)
    reformatted_time = datetime.strptime(time_as_str, reg)
    return reformatted_time


def get_predictions_with_confidence(model, X):
    ''' used for models that only output a single target'''
    y_pred = model.predict(X)
    pred_labels = tf.argmax(y_pred, axis=-1).numpy()
    probabilities = tf.reduce_max(y_pred, axis=-1).numpy()
    return np.c_[pred_labels.reshape((-1,1)), probabilities.reshape((-1,1))]

##### Load Model
The model loading and inferences will be different for each model type

In [11]:
# linear model
model = tf.keras.models.load_model(LINEAR_MODEL, custom_objects={'mcc_metric' : mcc_metric})

In [19]:
# RNN
model = tf.keras.models.load_model(RNN_MODEL, custom_objects={'last_step_accuracy': last_step_accuracy})




In [48]:
from tensorflow_addons.layers import multihead_attention

custom_objects={
    'MultiHeadAttention' : multihead_attention.MultiHeadAttention,
    'TransformerModel' : TransformerModel,
    'mcc_metric' : mcc_metric}

model = tf.keras.models.load_model(TRANSFORMER_MODEL,
                                   custom_objects=custom_objects)

Load Transformer

#### Get stock data

In [49]:
ticker = 'TSLA'
START_DATE = '2022-11-1'
END_DATE = '2022-11-20'

EVAL_RANGE=24
PREDICT_RANGE=3
INTERVAL = 5

In [50]:
# for normalization
train_means = np.load('../data/transformed/{}min/{}_train_means.npy'.format(INTERVAL, ticker))
train_stds = np.load('../data/transformed/{}min/{}_train_stds.npy'.format(INTERVAL, ticker))

# get stock data
tickerData = yf.Ticker(ticker)
df = tickerData.history(interval="{}m".format(INTERVAL),  start=START_DATE, end=END_DATE)
df = df.reset_index().rename({'Datetime':'Time'}, axis=1)

# transform
data, targets, time_stamps = transform_raw_data(df, INTERVAL, EVAL_RANGE=24, 
                                      PREDICT_RANGE=3, NO_CHANGE_THRESHOLD=0.01,
                                      TRAIN_RATIO=None, train_means=train_means,
                                      train_stds=train_stds)

#### Get Predictions
Inferences will be different for each model type

In [14]:
# linear
y_pred = get_predictions_with_confidence(model, data[:, -1, :])



In [53]:
# RNN
y_pred = get_last_step_predictions_with_confidence(model, data)



In [52]:
# Transformer
y_pred = get_predictions_with_confidence(model, data)



In [78]:
y_pred

array([[1.        , 0.59651297],
       [2.        , 0.75608945],
       [0.        , 0.72969216],
       ...,
       [2.        , 0.84498638],
       [2.        , 0.82757694],
       [2.        , 0.89306724]])

#### Prepare for Backtrade analysis

In [53]:
# supress setting with copy warning
# https://stackoverflow.com/questions/20625582/how-to-deal-with-settingwithcopywarning-in-pandas
pd.options.mode.chained_assignment = None

# create data file for backtrade
df = df.loc[EVAL_RANGE-1:df.shape[0]-PREDICT_RANGE-1] # data points with labels
df.drop(['Dividends','Stock Splits'], axis=1, inplace=True)
df.rename(columns = {'Open':'open','High':'high','Low':'low','Adj Close':'close','Volume':'volume',
                         }, inplace=True)
df[['prediction','confidence']] = y_pred
df['Time'] = df['Time'].apply(change_time_format)
df.set_index('Time', inplace=True)


# return setting with copy warning to default 
pd.options.mode.chained_assignment = 'warn'

In [54]:
df.head()

Unnamed: 0_level_0,open,high,low,Close,volume,prediction,confidence
Time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2022-11-01 11:25:00,229.350006,230.0,229.0,229.880005,692651,1.0,0.596513
2022-11-01 11:30:00,229.869995,230.389999,229.210007,229.75,581693,2.0,0.756089
2022-11-01 11:35:00,229.738602,230.089996,228.309998,228.748993,684823,0.0,0.729692
2022-11-01 11:40:00,228.679993,229.019897,228.100006,228.199905,710022,2.0,0.674045
2022-11-01 11:45:00,228.169998,229.0,228.009995,228.598297,538755,2.0,0.551132


#### Perform Backtrade Analysis

In [77]:
STARTING_CASH = 10000.0
COMMISSION = 0.0

# instantiate SignalData class
data = SignalData(dataname=df)
# instantiate Cerebro, add strategy, data, initial cash, commission 
cerebro = bt.Cerebro(stdstats=True, cheat_on_open=True)
cerebro.addstrategy(MyStrategy)
cerebro.adddata(data, name=ticker)
cerebro.broker.setcash(STARTING_CASH)
cerebro.broker.setcommission(commission=COMMISSION)
# cerebro.addanalyzer(bt.analyzers.PyFolio, _name='pyfolio')

# run the backtest
print('Starting Portfolio Value: %.2f' % cerebro.broker.getvalue())
backtest_result = cerebro.run()
final_value = cerebro.broker.getvalue()
print('Final Portfolio Value: %.2f' % final_value)


Starting Portfolio Value: 10000.00
2022-11-02 09:40:00, BUY CREATED --- Size: 10, Cash: 10000.00, Open: 225.08999633789062, Close: 226.0500030517578
2022-11-02 09:40:00, BUY EXECUTED --- Price: 225.09, Cost: 2250.90,Commission: 0.00
2022-11-02 10:05:00, SELL CREATED --- Size: 10
2022-11-02 10:05:00, SELL EXECUTED --- Price: 227.30, Cost: 2250.90,Commission: 0.00
2022-11-02 10:05:00, OPERATION RESULT --- Gross: 22.10, Net: 22.10
2022-11-02 14:00:00, BUY CREATED --- Size: 10, Cash: 10022.10, Open: 221.88999938964844, Close: 226.55999755859375
2022-11-02 14:00:00, BUY EXECUTED --- Price: 221.89, Cost: 2218.90,Commission: 0.00
2022-11-02 14:05:00, SELL CREATED --- Size: 10
2022-11-02 14:05:00, SELL EXECUTED --- Price: 226.50, Cost: 2218.90,Commission: 0.00
2022-11-02 14:05:00, OPERATION RESULT --- Gross: 46.07, Net: 46.07
2022-11-02 14:25:00, BUY CREATED --- Size: 10, Cash: 10068.17, Open: 224.89999389648438, Close: 226.375
2022-11-02 14:25:00, BUY EXECUTED --- Price: 224.90, Cost: 2249.00

Compute Net Gain

$$
    net\_gain = \frac{price_{new} - price_{orig}}{price_{orig}}
$$

In [79]:
net_gain = (final_value - STARTING_CASH)/STARTING_CASH
print(f'Net Gain Percentage: {net_gain*100:.2f}%')

Net Gain Percentage: 14.14%


In [75]:
import matplotlib as mpl
mpl.rcParams['figure.dpi'] = 150

cerebro.plot(iplot= False)

KeyboardInterrupt: 