Changes from v2:

1. ATR is calculated inside strategy s.t. it can be used to calculate take profit and stop loss
2. implemented PCA to reduce dimension

## Import Library

In [1]:
from humpback import *

from datetime import datetime
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

from tensorflow import keras
from tensorflow.keras import layers, models

from backtesting import Backtest, Strategy

2024-08-13 01:51:35.663707: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [2]:
import warnings
warnings.filterwarnings('ignore')

## Defining Functions

Functions in this section would be moved to humpback.py after testing for tidiness

https://keras.io/examples/timeseries/timeseries_traffic_forecasting/

## Defining Model

In [3]:
class LSTM_stack(layers.Layer):
    """stacks of LSTM with dropout, follwed by dense layer"""

    def __init__(
        self,
        dropout_rate: float = 0.2
        ):
        super().__init__()

        self.lstm_1 = layers.LSTM(
            units=50,
            return_sequences=True,
            )

        self.lstm_2 = layers.LSTM(
            units=50,
            return_sequences=True,
            )

        self.lstm_3 = layers.LSTM(
            units=50,
            return_sequences=False,
            )

        self.dense_1 = layers.Dense(
            units=1
            )

        self.dropout_1 = layers.Dropout(dropout_rate)

    def call(self, inputs):
        x = self.lstm_1(inputs)
        x = self.dropout_1(x)
        x = self.lstm_2(inputs)
        x = self.dropout_1(x)
        x = self.lstm_3(inputs)
        x = self.dropout_1(x)
        return self.dense_1(x)

In [4]:
class MyModel:
    def __init__(self):
        self.optimizer = 'adam'
        self.loss      = 'mean_squared_error'

    def trainModel(self, data_X, data_y, epochs: int = 20, batch_size: int = 32) -> None:
        self.trainScaler_X(data_X)
        self.trainScaler_y(data_y)

        self.trainPCA(data_X)
        eigen_X = self.pca.transform(self.scaler_X.transform(data_X))

        self.model = Sequential()
        self.model.add(LSTM(units=50,
                            return_sequences=True,
                            input_shape=(data_X.shape[1],1)))
        self.model.add(Dropout(0.2))

        self.model.add(LSTM(units=50,
                            return_sequences=True))
        self.model.add(Dropout(0.2))

        self.model.add(LSTM(units=50,
                            return_sequences=True))
        self.model.add(Dropout(0.2))

        self.model.add(LSTM(units=50))
        self.model.add(Dropout(0.2))

        self.model.add(Dense(units=1))

        self.model.compile(optimizer=self.optimizer,
                           loss=self.loss)

        self.model.fit(eigen_X,
                       self.scaler_y.transform(data_y),
                       epochs=epochs,
                       batch_size=batch_size)
    
    def trainScaler_X(self, data_X) -> None:
        self.scaler_X = MinMaxScaler()
        self.scaler_X.fit(data_X)

    def trainScaler_y(self, data_y) -> None:
        self.scaler_y = MinMaxScaler()
        self.scaler_y.fit(data_y)

    def predictModel(self, data_X):
        """output a predicted y given X

        Args:
            data_X (DataFrame): a pd.DataFrame of single row. Unscaled.

        Returns:
            unscaled prediction
        """
        scaled_X = self.scaler_X.transform(data_X)
        eigen_X = self.pca.transform(scaled_X)
        predict = self.model.predict(eigen_X)
        result = pd.DataFrame(self.scaler_y.inverse_transform(predict),
                              index=data_X.index)
        return result
    
    def trainPCA(self, data_X) -> None:
        self.pca = PCA(n_components=3)
        self.pca.fit(data_X)

    def get_model(self):
        return self.model

## Defining Strategy

In [13]:
class MyStr(Strategy):
    # Variables:
    # train_size = 0.7
    buy_threshold  =  .005
    sell_threshold = -.005

    # params
    N_train = 3500
    input_sequence_length = 20
    PCA_components = 6

    def init(self):
        # Declare indicators you will use in the strategy:
        self.getData(train_length=self.N_train, PCA_components=self.PCA_components)
        self.model_init(input_sequence_length=self.input_sequence_length, PCA_components=self.PCA_components)
        self.model_train()

    def next(self):
        if len(self.data) < self.N_train:
            return

        data_return = getReturn(self.data.df, 'Close')
        X = featureGeneration(data_return)
        X, _ = getXy(X, 'Return')
        X = self.pca.transform(self.X_scaler.transform(X.to_numpy()))

        X_dataset = keras.utils.timeseries_dataset_from_array(
            X[-(self.input_sequence_length+1):],
            None,
            sequence_length=self.input_sequence_length,
            shuffle=False)

        predictions = self.model.predict(X_dataset)
        
        atr_tp_sl = atr(self.data.df)

        if predictions[-1] > self.buy_threshold and not self.position.is_long:
            tp = self.data.Close[-1] + 2 * atr_tp_sl[-1]
            sl = self.data.Close[-1] - 2 * atr_tp_sl[-1]
            self.buy(size=.2, tp=tp, sl=sl)

        if predictions[-1] < self.sell_threshold and not self.position.is_short:
            tp = self.data.Close[-1] - 2 * atr_tp_sl[-1]
            sl = self.data.Close[-1] + 2 * atr_tp_sl[-1]
            self.sell(size=.2, tp=tp, sl=sl)

########################################

    def getData(self, train_length: int, PCA_components: int):
        data_return = getReturn(self.data.df, 'Close')
        data_feature = featureGeneration(data_return)
        X_data_df, y_data_df = getXy(data_feature.iloc[:train_length,], 'Return')

        train_size = 0.6
        val_size   = 0.4
        test_size  = 0
        
        X_train_array, X_val_array, _, self.X_scaler, self.pca = preprocess(X_data_df, train_size, val_size, test_size, apply_PCA=True, PCA_components=PCA_components)
        y_train_array, y_val_array, _, self.y_scaler = preprocess(y_data_df, train_size, val_size, test_size, apply_PCA=False)

        self.train_dataset = keras.utils.timeseries_dataset_from_array(
            X_train_array,
            y_train_array,
            sequence_length=self.input_sequence_length,
            shuffle=False)

        self.val_dataset = keras.utils.timeseries_dataset_from_array(
            X_val_array,
            y_val_array,
            sequence_length=self.input_sequence_length,
            shuffle=False) 

        # self.test_dataset = keras.utils.timeseries_dataset_from_array(
        #     X_test_array,
        #     None,
        #     sequence_length=input_sequence_length,
        #     shuffle=False) 

    def model_init(self, input_sequence_length: int, PCA_components: int):
        inputs  = keras.Input((
            input_sequence_length,
            PCA_components))
        outputs = LSTM_stack()(inputs)
        self.model   = keras.Model(inputs, outputs)

        optimizer = keras.optimizers.Adam()
        loss      = keras.losses.MeanSquaredError()

        self.model.compile(optimizer=optimizer,loss=loss)

    def model_train(self):
        self.model.fit(
            self.train_dataset,
            epochs=20,
            validation_data=self.val_dataset)



In [6]:
class WalkForwardStr(MyStr):
    def next(self):
        # Skip the cold start period with too few values available
        if len(self.data) < len(self.y_train):
            return
            
        if len(self.data) % 500:
            return super().next()

        # Retrain on last N_TRAIN values
        self.getData()
        self.model_init()

        # Now that the model is fitted, 
        # proceed the same as in MLTrainOnceStrategy
        super().next()

## Parameters

This section contains all the parameters.

In [7]:
# This chunk specifies the data

symbol = 'BTCUSDT'

## Obtain data

This section outputs training and testing data for the use of the following sections.

In [8]:
data_raw_df = pd.read_csv(f'Binance Data/{symbol}.csv', index_col='index')

## Backtesting

In [14]:
bt    = Backtest(data_raw_df, MyStr, cash = 1000000, commission=0.00075, margin=.05)
stats = bt.run()

Epoch 1/20
[1m17/17[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 59ms/step - loss: 0.4941 - val_loss: 0.3866
Epoch 2/20
[1m17/17[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 27ms/step - loss: 0.4716 - val_loss: 0.3829
Epoch 3/20
[1m17/17[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 27ms/step - loss: 0.4693 - val_loss: 0.3793
Epoch 4/20
[1m17/17[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 27ms/step - loss: 0.4582 - val_loss: 0.3783
Epoch 5/20
[1m17/17[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 27ms/step - loss: 0.4603 - val_loss: 0.3763
Epoch 6/20
[1m17/17[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 29ms/step - loss: 0.4491 - val_loss: 0.3756
Epoch 7/20
[1m17/17[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 30ms/step - loss: 0.4545 - val_loss: 0.3749
Epoch 8/20
[1m17/17[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 28ms/step - loss: 0.4512 - val_loss: 0.3749
Epoch 9/20
[1m17/17[0m [32m━━━━━━━━━━━━━━━━━━

TypeError: unsupported operand type(s) for -: 'str' and 'str'

## Results

In [None]:
# bt.plot()

In [None]:
# stats.tail()

In [None]:
# stats['_equity_curve']

## Ad hoc

In [None]:
# # This chunk specifies the data

# symbol = 'BTCUSDT'
# interval = '1h'

# start_str = int(datetime(2020,1,1,0,0).timestamp() * 1000)
# end_str    = int(datetime(2023,12,31,0,0).timestamp() * 1000)

In [None]:
# client = connectBinanceAPI()

In [None]:
# data_raw = getBinanceData(client=client,
                        #   symbol=symbol,
                        #   interval=interval,
                        #   start_str=start_str,
                        #   end_str=end_str)