### Library

In [21]:
import os
from dotenv import load_dotenv

from binance.client import Client

import pandas as pd
import numpy as np
from datetime import datetime
import ta

from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import train_test_split

from backtesting import Backtest, Strategy
from backtesting.test import SMA

### TA Calculation Functions

#### SMA

In [22]:
def sma(df, window):
    sma = ta.trend.SMAIndicator(pd.Series(df), window=window).sma_indicator()
    return sma

#### RSI

In [23]:
def rsi(df, window=14):
    rsi = ta.momentum.RSIIndicator(pd.Series(df), window=window).rsi()
    return rsi

#### EMA

In [24]:
def ema(df, period=200):
    ema = ta.trend.EMAIndicator(pd.Series(df), window=window).ema_indicator()
    return ema

#### MACD

In [25]:
def macd(df):
    macd = ta.trend.MACD(pd.Series(df)).macd()
    return macd

#### Bollinger Bands

In [26]:
def signal_h(df):
    return ta.volatility.BollingerBands(pd.Series(df)).bollinger_hband()
def signal_l(df):
    return ta.volatility.BollingerBands(pd.Series(df)).bollinger_lband()

### Strategy Class

#### ML Strategy

In [27]:
class MLstr(Strategy):
    price_delta = 0.004
    N_train = 10000
    def init(self):        
        # Init our model, a kNN classifier
        self.clf = KNeighborsClassifier(7)

        # Train the classifier in advance on the first N examples
        df = self.data.df.iloc[:self.N_train]
        X, y = get_clean_Xy(df)
        self.clf.fit(X, y)

        # Plot y for inspection
        self.I(get_y, self.data.df, name='y_true')

        # Prepare empty, all-NaN forecast indicator
        self.forecasts = self.I(lambda: np.repeat(np.nan, len(self.data)), name='forecast')

    def next(self):
        # Skip the training, in-sample data
        if len(self.data) < self.N_train:
            return

        # Proceed only with out-of-sample data. Prepare some variables
        high, low, close = self.data.High, self.data.Low, self.data.Close
        current_time = self.data.index[-1]

        # Forecast the next movement
        X = get_X(self.data.df.iloc[-1:])
        forecast = self.clf.predict(X)[0]

        # Update the plotted "forecast" indicator
        self.forecasts[-1] = forecast

        # If our forecast is upwards and we don't already hold a long position
        # place a long order for 20% of available account equity. Vice versa for short.
        # Also set target take-profit and stop-loss prices to be one price_delta
        # away from the current closing price.
        upper, lower = close[-1] * (1 + np.r_[1, -1] * self.price_delta)
 
        if forecast == 1 and not self.position.is_long:
            self.buy(size=.2, tp=upper, sl=lower)
        elif forecast == -1 and not self.position.is_short:
            self.sell(size=.2, tp=lower, sl=upper)

        # Additionally, set aggressive stop-loss on trades that have been open 
        # for more than two days
        for trade in self.trades:
            if current_time - trade.entry_time > pd.Timedelta('2 days'):
                if trade.is_long:
                    trade.sl = max(trade.sl, low)
                else:
                    trade.sl = min(trade.sl, high)


### ML Model


## Main

### Binance API

In [28]:
load_dotenv()
API_KEY    = os.getenv('API_KEY')
API_SECRET = os.getenv('API_SECRET')

client = Client(API_KEY, API_SECRET)

### Parameters

### Fetch Historical Data

In [29]:
symbol = 'BTCUSDT'
interval = '1h'

start_time = int(datetime(2020,1,1,0,0).timestamp() * 1000)
end_time    = int(datetime(2023,12,31,0,0).timestamp() * 1000)

kline = client.get_historical_klines(symbol=symbol, interval=interval, start_str=start_time, end_str=end_time)

columns = ['index','Open', 'High', 'Low', 'Close', 'Volume']

data = pd.DataFrame(kline)
data = data.iloc[:, :6]
data.columns  = columns
data['index'] = pd.to_datetime(data['index'], unit='ms')
data.set_index('index', inplace=True)
data = data.astype(float)

### Backtesting

#### Backtesting

#### Graph

## For Temp Ad Hoc Testing

In [30]:
def get_X(data):
    return data.filter(like='X').values

In [31]:
def get_y(data):
    y = data['Close'].pct_change(48).shift(-48)
    y[y.between(-.004, .004)] = 0             # Devalue returns smaller than 0.4%
    y[y > 0] = 1
    y[y < 0] = -1
    return y

In [32]:
def get_clean_Xy(data):
    X = get_X(data)
    y = get_y(data)s
    isnan = np.isnan(y)
    X = X[~isnan]
    y = y[~isnan]
    return X, y

In [47]:
# def featureGeneration(data):
close = data.Close.values

sma10 = sma(data.Close, 10).values
sma20 = sma(data.Close, 20).values
sma50 = sma(data.Close, 50).values
sma100 = sma(data.Close, 100).values
upper = signal_h(data.Close).values
lower = signal_l(data.Close).values

# Design matrix / independent features:

# Price-derived features
data['X_SMA10'] = (close - sma10) / close
data['X_SMA20'] = (close - sma20) / close
data['X_SMA50'] = (close - sma50) / close
data['X_SMA100'] = (close - sma100) / close

data['X_DELTA_SMA10'] = (sma10 - sma20) / close
data['X_DELTA_SMA20'] = (sma20 - sma50) / close
data['X_DELTA_SMA50'] = (sma50 - sma100) / close

# Indicator features
data['X_MOM'] = data.Close.pct_change(periods=2)
data['X_BB_upper'] = (upper - close) / close
data['X_BB_lower'] = (lower - close) / close
data['X_BB_width'] = (upper - lower) / close

# Some datetime features for good measure
data['X_day'] = data.index.dayofweek
data['X_hour'] = data.index.hour

data = data.dropna().astype(float)

In [45]:
# def featureGeneration(data):
close = data.Close

sma10 = sma(data.Close, 10)
sma20 = sma(data.Close, 20)
sma50 = sma(data.Close, 50)
sma100 = sma(data.Close, 100)
upper = signal_h(data.Close)
lower = signal_l(data.Close)

# Design matrix / independent features:

# Price-derived features
data['X_SMA10'] = (close - sma10) / close
data['X_SMA20'] = (close - sma20) / close
data['X_SMA50'] = (close - sma50) / close
data['X_SMA100'] = (close - sma100) / close

data['X_DELTA_SMA10'] = (sma10 - sma20) / close
data['X_DELTA_SMA20'] = (sma20 - sma50) / close
data['X_DELTA_SMA50'] = (sma50 - sma100) / close

# Indicator features
data['X_MOM'] = data.Close.pct_change(periods=2)
data['X_BB_upper'] = (upper - close) / close
data['X_BB_lower'] = (lower - close) / close
data['X_BB_width'] = (upper - lower) / close

# Some datetime features for good measure
data['X_day'] = data.index.dayofweek
data['X_hour'] = data.index.hour

data = data.dropna().astype(float)

In [48]:
data

Unnamed: 0_level_0,Open,High,Low,Close,Volume,X_SMA10,X_SMA20,X_SMA50,X_SMA100,X_DELTA_SMA10,X_DELTA_SMA20,X_DELTA_SMA50,X_MOM,X_BB_upper,X_BB_lower,X_BB_width,X_day,X_hour
index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1
2020-01-21 07:00:00,8649.97,8657.51,8623.64,8634.70,1069.380106,-0.002381,-0.002327,-0.007182,-0.020191,0.000054,-0.004854,-0.013010,-0.002932,0.008025,-0.003370,0.011395,1.0,7.0
2020-01-21 08:00:00,8634.16,8670.00,8613.80,8658.59,1525.363374,0.000341,0.000002,-0.003445,-0.017196,-0.000339,-0.003448,-0.013750,0.001069,0.004222,-0.004226,0.008448,1.0,8.0
2020-01-21 09:00:00,8658.59,8666.78,8625.76,8630.21,1093.261576,-0.002806,-0.003423,-0.005693,-0.020238,-0.000617,-0.002270,-0.014546,-0.000520,0.007000,-0.000154,0.007154,1.0,9.0
2020-01-21 10:00:00,8630.08,8670.00,8602.80,8659.23,1754.110648,0.000385,-0.000052,-0.001297,-0.016584,-0.000438,-0.001244,-0.015287,0.000074,0.003617,-0.003512,0.007129,1.0,10.0
2020-01-21 11:00:00,8659.38,8677.12,8641.18,8656.76,1346.385777,0.000093,-0.000230,-0.000649,-0.016554,-0.000324,-0.000418,-0.015905,0.003076,0.003699,-0.003239,0.006938,1.0,11.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2023-12-30 12:00:00,41893.96,42104.70,41882.71,41990.30,1042.660920,0.002097,0.001010,-0.008181,-0.013734,-0.001086,-0.009191,-0.005553,0.004383,0.006152,-0.008173,0.014325,5.0,12.0
2023-12-30 13:00:00,41990.31,42050.00,41886.43,42029.35,865.659130,0.002889,0.001673,-0.006753,-0.012701,-0.001216,-0.008427,-0.005948,0.003232,0.005356,-0.008702,0.014058,5.0,13.0
2023-12-30 14:00:00,42029.35,42300.00,42029.35,42244.12,1348.107560,0.007357,0.006524,-0.001378,-0.007458,-0.000833,-0.007903,-0.006080,0.006045,0.001012,-0.014061,0.015072,5.0,14.0
2023-12-30 15:00:00,42244.11,42430.00,42183.09,42374.96,1295.173300,0.009783,0.009287,0.001911,-0.004274,-0.000496,-0.007376,-0.006185,0.008223,-0.000800,-0.017774,0.016974,5.0,15.0


In [None]:
bt = Backtest(data, MLstr, cash = 1000000, commission = .0007, margin = .05)
bt.run()
bt.plot()

In [36]:
np.r_[1, -1]

array([ 1, -1])

## Reference

Backtesting User Manual

https://kernc.github.io/backtesting.py/doc/examples/Quick%20Start%20User%20Guide.html