### Library

In [1]:
import os
from dotenv import load_dotenv

from binance.client import Client

import pandas as pd
import numpy as np
from datetime import datetime
import ta

from sklearn.neighbors import KNeighborsClassifier

from backtesting import Backtest, Strategy

  return pd.read_csv(join(dirname(__file__), filename),
  return pd.read_csv(join(dirname(__file__), filename),


### TA Calculation Functions

#### SMA

In [2]:
def sma(df, window):
    sma = ta.trend.SMAIndicator(pd.Series(df), window=window).sma_indicator()
    return sma

#### RSI

In [3]:
def rsi(df, window=14):
    rsi = ta.momentum.RSIIndicator(pd.Series(df), window=window).rsi()
    return rsi

#### EMA

In [4]:
def ema(df, period=200):
    ema = ta.trend.EMAIndicator(pd.Series(df), window=window).ema_indicator()
    return ema

#### MACD

In [5]:
def macd(df):
    macd = ta.trend.MACD(pd.Series(df)).macd()
    return macd

#### Bollinger Bands

In [6]:
def signal_h(df):
    return ta.volatility.BollingerBands(pd.Series(df)).bollinger_hband()
def signal_l(df):
    return ta.volatility.BollingerBands(pd.Series(df)).bollinger_lband()

### Strategy Class

#### ML Strategy

In [7]:
class MLstr(Strategy):
    price_delta = 0.004
    N_train = 10000
    def init(self):        
        # Init our model, a kNN classifier
        self.clf = KNeighborsClassifier(7)

        # Train the classifier in advance on the first N examples
        df = self.data.df.iloc[:self.N_train]
        X, y = get_clean_Xy(df)
        self.clf.fit(X, y)

        # Plot y for inspection
        self.I(get_y, self.data.df, name='y_true')

        # Prepare empty, all-NaN forecast indicator
        self.forecasts = self.I(lambda: np.repeat(np.nan, len(self.data)), name='forecast')

    def next(self):
        # Skip the training, in-sample data
        if len(self.data) < self.N_train:
            return

        # Proceed only with out-of-sample data. Prepare some variables
        high, low, close = self.data.High, self.data.Low, self.data.Close
        current_time = self.data.index[-1]

        # Forecast the next movement
        X = get_X(self.data.df.iloc[-1:])
        forecast = self.clf.predict(X)[0]

        # Update the plotted "forecast" indicator
        self.forecasts[-1] = forecast

        # If our forecast is upwards and we don't already hold a long position
        # place a long order for 20% of available account equity. Vice versa for short.
        # Also set target take-profit and stop-loss prices to be one price_delta
        # away from the current closing price.
        upper, lower = close[-1] * (1 + np.r_[1, -1] * self.price_delta)
 
        if forecast == 1 and not self.position.is_long:
            self.buy(size=.2, tp=upper, sl=lower)
        elif forecast == -1 and not self.position.is_short:
            self.sell(size=.2, tp=lower, sl=upper)

        # Additionally, set aggressive stop-loss on trades that have been open 
        # for more than two days
        for trade in self.trades:
            if current_time - trade.entry_time > pd.Timedelta('2 days'):
                if trade.is_long:
                    trade.sl = max(trade.sl, low)
                else:
                    trade.sl = min(trade.sl, high)


### ML Model


In [8]:
def get_X(data):
    return data.filter(like='X').values

In [9]:
def get_y(data):
    y = data['Close'].pct_change(48).shift(-48)
    y[y.between(-.004, .004)] = 0             # Devalue returns smaller than 0.4%
    y[y > 0] = 1
    y[y < 0] = -1
    return y

In [11]:
def get_clean_Xy(data):
    X = get_X(data)
    y = get_y(data).value
    isnan = np.isnan(y)
    X = X[~isnan]
    y = y[~isnan]
    return X, y

In [None]:
def featureGeneration(data):
    close = data.Close

    sma10 = sma(data.Close, 10)
    sma20 = sma(data.Close, 20)
    sma50 = sma(data.Close, 50)
    sma100 = sma(data.Close, 100)
    upper = signal_h(data.Close)
    lower = signal_l(data.Close)

    # Design matrix / independent features:

    # Price-derived features
    data['X_SMA10'] = (close - sma10) / close
    data['X_SMA20'] = (close - sma20) / close
    data['X_SMA50'] = (close - sma50) / close
    data['X_SMA100'] = (close - sma100) / close

    data['X_DELTA_SMA10'] = (sma10 - sma20) / close
    data['X_DELTA_SMA20'] = (sma20 - sma50) / close
    data['X_DELTA_SMA50'] = (sma50 - sma100) / close

    # Indicator features
    data['X_MOM'] = data.Close.pct_change(periods=2)
    data['X_BB_upper'] = (upper - close) / close
    data['X_BB_lower'] = (lower - close) / close
    data['X_BB_width'] = (upper - lower) / close

    # Some datetime features for good measure
    data['X_day'] = data.index.dayofweek
    data['X_hour'] = data.index.hour

    data = data.dropna().astype(float)
    return data

## Main

### Binance API

In [12]:
load_dotenv()
API_KEY    = os.getenv('API_KEY')
API_SECRET = os.getenv('API_SECRET')

client = Client(API_KEY, API_SECRET)

### Parameters

### Fetch Historical Data

In [13]:
symbol = 'BTCUSDT'
interval = '1h'

start_time = int(datetime(2020,1,1,0,0).timestamp() * 1000)
end_time    = int(datetime(2023,12,31,0,0).timestamp() * 1000)

kline = client.get_historical_klines(symbol=symbol, interval=interval, start_str=start_time, end_str=end_time)

columns = ['index','Open', 'High', 'Low', 'Close', 'Volume']

data = pd.DataFrame(kline)
data = data.iloc[:, :6]
data.columns  = columns
data['index'] = pd.to_datetime(data['index'], unit='ms')
data.set_index('index', inplace=True)
data = data.astype(float)

In [None]:
def fetchData(symbol='BTCUSDT', interval='1h', start_time, end_time):
    # start_time = int(datetime(2020,1,1,0,0).timestamp() * 1000)
    # end_time    = int(datetime(2023,12,31,0,0).timestamp() * 1000)

    kline = client.get_historical_klines(symbol=symbol, interval=interval, start_str=start_time, end_str=end_time)

    columns = ['index','Open', 'High', 'Low', 'Close', 'Volume']

    data = pd.DataFrame(kline)
    data = data.iloc[:, :6]
    data.columns  = columns
    data['index'] = pd.to_datetime(data['index'], unit='ms')
    data.set_index('index', inplace=True)
    data = data.astype(float)

    return data

### Backtesting

#### Backtesting

#### Graph

## For Temp Ad Hoc Testing

In [15]:
data

Unnamed: 0_level_0,Open,High,Low,Close,Volume,X_SMA10,X_SMA20,X_SMA50,X_SMA100,X_DELTA_SMA10,X_DELTA_SMA20,X_DELTA_SMA50,X_MOM,X_BB_upper,X_BB_lower,X_BB_width,X_day,X_hour
index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1
2020-01-04 19:00:00,7306.60,7353.84,7283.01,7334.83,1966.929401,0.001912,0.000839,0.012521,0.016604,-0.001073,0.011682,0.004083,-0.002334,0.004193,-0.005871,0.010065,5.0,19.0
2020-01-04 20:00:00,7334.89,7352.64,7310.67,7341.06,1433.864061,0.002449,0.001423,0.012359,0.017262,-0.001026,0.010936,0.004903,0.004672,0.003371,-0.006216,0.009587,5.0,20.0
2020-01-04 21:00:00,7340.90,7367.31,7330.72,7350.68,869.007514,0.003561,0.002447,0.012652,0.018300,-0.001114,0.010205,0.005648,0.002161,0.002284,-0.007178,0.009462,5.0,21.0
2020-01-04 22:00:00,7350.72,7359.70,7342.67,7350.24,474.926109,0.002993,0.002235,0.011500,0.017994,-0.000758,0.009265,0.006494,0.001251,0.002597,-0.007067,0.009664,5.0,22.0
2020-01-04 23:00:00,7350.05,7363.00,7328.90,7354.11,528.793108,0.002834,0.002617,0.010996,0.018265,-0.000217,0.008379,0.007269,0.000467,0.002359,-0.007593,0.009952,5.0,23.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2023-12-30 12:00:00,41893.96,42104.70,41882.71,41990.30,1042.660920,0.002097,0.001010,-0.008181,-0.013734,-0.001086,-0.009191,-0.005553,0.004383,0.006152,-0.008173,0.014325,5.0,12.0
2023-12-30 13:00:00,41990.31,42050.00,41886.43,42029.35,865.659130,0.002889,0.001673,-0.006753,-0.012701,-0.001216,-0.008427,-0.005948,0.003232,0.005356,-0.008702,0.014058,5.0,13.0
2023-12-30 14:00:00,42029.35,42300.00,42029.35,42244.12,1348.107560,0.007357,0.006524,-0.001378,-0.007458,-0.000833,-0.007903,-0.006080,0.006045,0.001012,-0.014061,0.015072,5.0,14.0
2023-12-30 15:00:00,42244.11,42430.00,42183.09,42374.96,1295.173300,0.009783,0.009287,0.001911,-0.004274,-0.000496,-0.007376,-0.006185,0.008223,-0.000800,-0.017774,0.016974,5.0,15.0


In [None]:
bt = Backtest(data, MLstr, cash = 1000000, commission = .0007, margin = .05)
bt.run()
bt.plot()

In [36]:
np.r_[1, -1]

array([ 1, -1])

## Reference

Backtesting User Manual

https://kernc.github.io/backtesting.py/doc/examples/Quick%20Start%20User%20Guide.html