Changes from v2:

1. ATR is calculated inside strategy s.t. it can be used to calculate take profit and stop loss
2. implemented PCA to reduce dimension

## Import Library

In [None]:
from humpback import *

from datetime import datetime
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

from tensorflow import keras
from tensorflow.keras import layers, models

from backtesting import Backtest, Strategy

In [None]:
import warnings
warnings.filterwarnings('ignore')

## Defining Functions

Functions in this section would be moved to humpback.py after testing for tidiness

https://keras.io/examples/timeseries/timeseries_traffic_forecasting/

## Defining Model

In [None]:
class LSTM_stack(layers.Layer):
    """stacks of LSTM with dropout, follwed by dense layer"""

    def __init__(
        self,
        dropout_rate: float = 0.2
        ):
        super().__init__()

        self.lstm_1 = layers.LSTM(
            units=50,
            return_sequences=True,
            )

        self.lstm_2 = layers.LSTM(
            units=50,
            return_sequences=True,
            )

        self.lstm_3 = layers.LSTM(
            units=50,
            return_sequences=False,
            )

        self.dense_1 = layers.Dense(
            units=1
            )

        self.dropout_1 = layers.Dropout(dropout_rate)

    def call(self, inputs):
        x = self.lstm_1(inputs)
        x = self.dropout_1(x)
        x = self.lstm_2(inputs)
        x = self.dropout_1(x)
        x = self.lstm_3(inputs)
        x = self.dropout_1(x)
        return self.dense_1(x)

## Defining Strategy

In [None]:
class MyStr(Strategy):
    # Variables:
    # train_size = 0.7
    buy_threshold  =  .005
    sell_threshold = -.005

    # params
    N_train = 12200
    input_sequence_length = 20
    PCA_components = 6

    def init(self):
        # Declare indicators you will use in the strategy:
        self.getData(train_length=self.N_train, PCA_components=self.PCA_components)
        self.model_init(input_sequence_length=self.input_sequence_length, PCA_components=self.PCA_components)
        self.model_train()

        data_return = getReturn(self.data.df, 'Close')
        X = featureGeneration(data_return)
        X, _ = getXy(X, 'Return')
        X = self.pca.transform(self.X_scaler.transform(X.to_numpy()))

        X_dataset = keras.utils.timeseries_dataset_from_array(
            X,
            None,
            sequence_length=self.input_sequence_length,
            shuffle=False)

        prediction = self.model.predict(X_dataset)

        self.prediction = self.I(lambda: np.repeat(np.nan, len(self.data)), name='prediction')
        self.prediction[-len(prediction):] = prediction.flatten()

    def next(self):
        if len(self.data) < self.N_train:
            return

        # data_return = getReturn(self.data.df, 'Close')
        # X = featureGeneration(data_return)
        # X, _ = getXy(X, 'Return')
        # X = self.pca.transform(self.X_scaler.transform(X.to_numpy()))

        # X_dataset = keras.utils.timeseries_dataset_from_array(
        #     X[-(self.input_sequence_length+1):],
        #     None,
        #     sequence_length=self.input_sequence_length,
        #     shuffle=False)

        # prediction = self.model.predict(X_dataset)
        
        atr_tp_sl = atr(self.data.df)

        if self.prediction[-1] > self.buy_threshold and not self.position.is_long:
            tp = self.data.Close[-1] + 2 * atr_tp_sl[-1]
            sl = self.data.Close[-1] - 2 * atr_tp_sl[-1]
            self.buy(size=.2, tp=tp, sl=sl)

        if self.prediction[-1] < self.sell_threshold and not self.position.is_short:
            tp = self.data.Close[-1] - 2 * atr_tp_sl[-1]
            sl = self.data.Close[-1] + 2 * atr_tp_sl[-1]
            self.sell(size=.2, tp=tp, sl=sl)

########################################

    def getData(self, train_length: int, PCA_components: int):
        data_return = getReturn(self.data.df, 'Close')
        data_feature = featureGeneration(data_return)
        X_data_df, y_data_df = getXy(data_feature.iloc[:train_length,], 'Return')

        train_size = 0.6
        val_size   = 0.4
        test_size  = 0
        
        X_train_array, X_val_array, _, self.X_scaler, self.pca = preprocess(X_data_df, train_size, val_size, test_size, apply_PCA=True, PCA_components=PCA_components)
        y_train_array, y_val_array, _, self.y_scaler = preprocess(y_data_df, train_size, val_size, test_size, apply_PCA=False)

        self.train_dataset = keras.utils.timeseries_dataset_from_array(
            X_train_array,
            y_train_array,
            sequence_length=self.input_sequence_length,
            shuffle=False)

        self.val_dataset = keras.utils.timeseries_dataset_from_array(
            X_val_array,
            y_val_array,
            sequence_length=self.input_sequence_length,
            shuffle=False) 

        # self.test_dataset = keras.utils.timeseries_dataset_from_array(
        #     X_test_array,
        #     None,
        #     sequence_length=input_sequence_length,
        #     shuffle=False) 

    def model_init(self, input_sequence_length: int, PCA_components: int):
        inputs  = keras.Input((
            input_sequence_length,
            PCA_components))
        outputs = LSTM_stack()(inputs)
        self.model   = keras.Model(inputs, outputs)

        optimizer = keras.optimizers.Adam()
        loss      = keras.losses.MeanSquaredError()

        self.model.compile(optimizer=optimizer,loss=loss)

    def model_train(self):
        self.model.fit(
            self.train_dataset,
            epochs=20,
            validation_data=self.val_dataset)



In [None]:
class WalkForwardStr(MyStr):
    def next(self):
        # Skip the cold start period with too few values available
        if len(self.data) < len(self.y_train):
            return
            
        if len(self.data) % 500:
            return super().next()

        # Retrain on last N_TRAIN values
        self.getData()
        self.model_init()

        # Now that the model is fitted, 
        # proceed the same as in MLTrainOnceStrategy
        super().next()

## Parameters

This section contains all the parameters.

In [None]:
# This chunk specifies the data

symbol = 'BTCUSDT'

## Obtain data

This section outputs training and testing data for the use of the following sections.

In [None]:
data_raw_df = pd.read_csv(f'Binance Data/{symbol}.csv', index_col='index')
data_raw_df = data_raw_df.set_index(pd.DatetimeIndex(pd.to_datetime(data_raw_df.index)))

## Backtesting

In [None]:
bt    = Backtest(data_raw_df, MyStr, cash = 1000000, commission=0.00075, margin=.05)
stats = bt.run()

## Results

In [22]:
bt.plot()

In [None]:
# stats.tail()

In [None]:
# stats['_equity_curve']

## Ad hoc

In [21]:
%%time

stats_skopt, heatmap, optimize_result = bt.optimize(
    buy_threshold =[ 0.001,  0.1],      # Note: For method="skopt", we
    sell_threshold=[-0.001, -0.1],      # only need interval end-points
    maximize='Equity Final [$]',
    method='skopt',
    max_tries=200,
    random_state=0,
    return_heatmap=True,
    return_optimization=True)

Epoch 1/20
[1m17/17[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 56ms/step - loss: 0.5519 - val_loss: 0.4158
Epoch 2/20
[1m17/17[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 28ms/step - loss: 0.4944 - val_loss: 0.3965
Epoch 3/20
[1m17/17[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 30ms/step - loss: 0.4759 - val_loss: 0.3854
Epoch 4/20
[1m17/17[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 27ms/step - loss: 0.4743 - val_loss: 0.3824
Epoch 5/20
[1m17/17[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 26ms/step - loss: 0.4677 - val_loss: 0.3817
Epoch 6/20
[1m17/17[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 27ms/step - loss: 0.4634 - val_loss: 0.3812
Epoch 7/20
[1m17/17[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 28ms/step - loss: 0.4685 - val_loss: 0.3797
Epoch 8/20
[1m17/17[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 27ms/step - loss: 0.4572 - val_loss: 0.3786
Epoch 9/20
[1m17/17[0m [32m━━━━━━━━━━━━━━━━━━

KeyboardInterrupt: 