In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import glob
# Loading the model
import tensorflow.keras
plt.style.use("seaborn")
import pickle
pd.set_option('display.float_format', lambda x: '%.5f' % x)
import seaborn as sns

In [20]:
import random
import numpy as np
import tensorflow as tf
from tensorflow.keras.layers import Dense, Dropout
from tensorflow.keras.models import Sequential
from tensorflow.keras.regularizers import l1, l2
from tensorflow.keras.optimizers import Adam
from keras.layers import LSTM

In [176]:
def set_seeds(seed = 100):
    random.seed(seed)
    np.random.seed(seed)
    tf.random.set_seed(seed)
    
def cw(df):
    c0, c1 = np.bincount(df["dir"])
    w0 = (1/c0) * (len(df)) / 2
    w1 = (1/c1) * (len(df)) / 2
    return {0:w0, 1:w1}

optimizer = Adam(lr = 0.0001)

def create_model(hl = 2, hu = 100, dropout = False, rate = 0.3, regularize = False,
                 reg = l1(0.0005), optimizer = optimizer, input_dim = 8):
#input_dim
    model = Sequential()
    model.add(LSTM(units=50,return_sequences=True,input_shape=(8,1)))
    model.add(Dropout(0.2))
    model.add(LSTM(units=50))
    model.add(Dropout(0.2))
    model.add(Dense(units=1))
    model.compile(optimizer='adam',loss='mean_squared_error')
    print(model.summary())
    return model

  super(Adam, self).__init__(name, **kwargs)


In [177]:
class ML_Backtester():
    ''' Class for the vectorized backtesting of (levered) Futures MAchine Learning powered trading strategies .
    
    Attributes
    ============
    filepath: str
        local filepath of the dataset (csv-file)
    symbol: str
        ticker symbol (instrument) to be backtested
    start: str
        start date for data import
    end: str
        end date for data import
    tc: float
        proportional trading costs per trade
    granulity: str
        granulity 5m, 15m, 30m, 1h, 4h
    
    
    Methods
    =======
    get_data:
        imports the data.
        
    test_strategy:
        prepares the data and backtests the trading strategy incl. reporting (wrapper).
        
    prepare_data:
        prepares the data for backtesting.
    
    run_backtest:
        runs the strategy backtest.
        
    plot_results:
        plots the cumulative performance of the trading strategy compared to buy-and-hold.
        
    optimize_strategy:
        backtests strategy for different parameter values incl. optimization and reporting (wrapper).
    
    find_best_strategy:
        finds the optimal strategy (global maximum).
        
    add_sessions:
        adds/labels trading sessions and their compound returns.
        
    add_leverage:
        adds leverage to the strategy.
        
    print_performance:
        calculates and prints various performance metrics.
        
    '''    
    
    def __init__(self, filepath, symbol, tc, granulity, window, lags, hini, hfin):
        
        self.filepath = filepath
        self.symbol = symbol
        self.tc = tc
        self.results = None
        self.get_data()
        self.tp_year = (self.data.Close.count() / ((self.data.index[-1] - self.data.index[0]).days / 365.25))
        self.granulity=granulity
        self.window=window
        self.lags=lags
        self.hini=hini
        self.hfin=hfin
        
    def __repr__(self):
        return "Futures_Backtester(window= {}, lag = {})".format(self.window, self.lags)
        
    def get_data(self):
        ''' Imports the data.
        '''
        try:
            data = pd.read_csv("../Data/{}/consolidado{}.csv".format(granulity, granulity))
        except IOError:
            
            path = r"../Data/{}".format(granulity) # use your path
            all_files = glob.glob(path + "/*.csv")

            out_file = "../Data/{}/consolidado{}.csv".format(granulity, granulity) 
            first_file = True # needed to write the header only for first file 
            for fp in all_files: 
                df = pd.read_csv(fp) 
                df = df.dropna() # remove records with blanks 
                if first_file:  
                    df.to_csv(out_file, index=False) 
                    first_file = False 
                else: 
                    df.to_csv(out_file, index=False, header=False, mode='a') 

            data = pd.read_csv("../Data/{}/consolidado{}.csv".format(granulity, granulity))
        finally:  
            data.columns = ["Open Time", "Open", "High", "Low", "Close",
                  "Volume", "Clos Time", "Quote Asset Volume", 
                  "Number of Trades", "Taker Buy Base Asset Volume",
                  "Taker Buy Quote Asset Volume", "Ignore" ]

            data["Open Time"] = pd.to_numeric(data["Open Time"], errors = "coerce")
            data["Date"] = pd.to_datetime(data.iloc[:,0], unit = "ms")
            data=data.dropna()
            data.set_index("Date", inplace = True)
            data=data.sort_values("Date", ascending=True)
            symbol = 'Close'
            data["returns"] = np.log(data['Close'] / data['Close'].shift())
           
            self.data = data

    
    def test_strategy(self):
        '''
        Prepares the data and backtests the trading strategy incl. reporting (Wrapper).
         
        Parameters
        ============
        '''
        self.prepare_data()
        self.run_backtest()
        
        data = self.results.copy()
        data["creturns"] = data["returns"].cumsum().apply(np.exp)
        data["cstrategy"] = data["strategy"].cumsum().apply(np.exp)
        self.results = data
        self.print_performance()
    
    def prepare_data(self):
        ''' Prepares the Data for Backtesting.
        '''
       # window = 50 now defined as init class attribute
       # lags = 10 now defined as init class attribute
        cols = []
        features = ["dir", "sma", "boll", "min", "max", "mom", "vol"]
        
        df = self.data.copy()
        df["dir"] = np.where(df["returns"] > 0, 1, 0)
        df["sma"] = df[symbol].rolling(window).mean() - df[symbol].rolling(150).mean()
        df["boll"] = (df[symbol] - df[symbol].rolling(window).mean()) / df[symbol].rolling(window).std()
        df["min"] = df[symbol].rolling(window).min() / df[symbol] - 1
        df["max"] = df[symbol].rolling(window).max() / df[symbol] - 1
        df["mom"] = df["returns"].rolling(3).mean()
        df["vol"] = df["Volume"]
        df.dropna(inplace = True)
        
        cols = []
        features = ["Close", "sma", "boll", "min", "max", "mom", "vol", "dir"]
        cols=features
        df.dropna(inplace = True)
        ########################## Strategy-Specific #############################
        split = int(len(df)*0.7)
        train = df.iloc[:split].copy()
        test = df.iloc[split:].copy()
        mu, std = train.mean(), train.std() # train set parameters (mu, std) for standardization
        train_s = (train - mu) / std 
        set_seeds(100)
        try:
            model = tensorflow.keras.models.load_model("./Models/{}/DNN_BTCUSDT_{}_W{}_L{}".format(granulity, granulity, window, lags))
            # Loading mu and std
            params = pickle.load(open("./Models/{}/DNN_BTCUSDT_{}_W{}_L{}/params.pkl".format(granulity, granulity, window, lags), "rb"))
            mu = params["mu"]
            std = params["std"]
        except IOError:

            model = create_model(hl = 3, hu = 50, dropout = True, input_dim = len(cols))
            model.fit(x = train_s[cols], y = train["dir"], epochs = 5, verbose = 0, validation_split = 0.2, shuffle = False, class_weight = cw(train))
            #pred = model.predict(train_s[cols])
            model.save("./Models/{}/DNN_BTCUSDT_{}_W{}_L{}".format(granulity, granulity, window, lags))
            params = {"mu":mu, "std":std}
            pickle.dump(params, open("./Models/{}/DNN_BTCUSDT_{}_W{}_L{}/params.pkl".format(granulity, granulity, window, lags), "wb"))
            
        ##########################################################################
        
        finally:
            test_s = (test - mu) / std # standardization of test set features (with train set parameters!!!)
            pred = model.predict(test_s[cols])
            test["proba"] = model.predict(test_s[cols])
            test["position"] = np.where(test.proba < 0.47, -1, np.nan) # 1. short where proba < 0.47
            test["position"] = np.where(test.proba > 0.53, 1, test.position) # 2. long where proba > 0.53
            test.index = test.index.tz_localize("UTC")
            test["NYTime"] = test.index.tz_convert("America/New_York")
            test["hour"] = test.NYTime.dt.hour
            test["position"] = np.where(~test.hour.between(self.hini, self.hfin), 0, test.position) # 3. neutral in non-busy hours
            test["position"] = test.position.ffill().fillna(0) # 4. in all other cases: hold position

        
        self.results = test
    
    def run_backtest(self):
        ''' Runs the strategy backtest.
        '''
        test=self.results.copy()
        test["strategy"] = test["position"] * test["returns"]
        test["creturns"] = test["returns"].cumsum().apply(np.exp)
        test["cstrategy"] = test["strategy"].cumsum().apply(np.exp)
        test["trades"] = test.position.diff().abs()
        test["strategy_net"] = test.strategy - test.trades * tc
        test["cstrategy_net"] = test["strategy_net"].cumsum().apply(np.exp)
        self.results=test
    
    def plot_results(self, leverage = False): #Adj!
        '''  Plots the cumulative performance of the trading strategy compared to buy-and-hold.
        '''
        if self.results is None:
            print("Run test_strategy() first.")
        elif leverage: # NEW!
            title = "{} | TC = {} | Leverage = {}".format(self.symbol, self.tc, self.leverage)
            self.results[["creturns", "cstrategy", "cstrategy_levered"]].plot(title=title, figsize=(12, 8))
        else:
            title = "{} | TC = {}".format(self.symbol, self.tc)
            self.results[["creturns", "cstrategy"]].plot(title=title, figsize=(12, 8))
            
            
    def optimize_strategy(self, SMA_S_range, SMA_M_range, SMA_L_range, metric = "Multiple"):
        '''
        Backtests strategy for different parameter values incl. Optimization and Reporting (Wrapper).
         
        Parameters
        ============
        SMA_S_range: tuple
            tuples of the form (start, end, step size).
        
        SMA_M_range: tuple
            tuples of the form (start, end, step size).
            
        SMA_L_range: tuple
            tuples of the form (start, end, step size).
        
        metric: str
            performance metric to be optimized (can be "Multiple" or "Sharpe")
        '''
        
        self.metric = metric
        
        if metric == "Multiple":
            performance_function = self.calculate_multiple
        elif metric == "Sharpe":
            performance_function = self.calculate_sharpe
        
        SMA_S_range = range(*SMA_S_range)
        SMA_M_range = range(*SMA_M_range)
        SMA_L_range = range(*SMA_L_range)
        
        combinations = list(product(SMA_S_range, SMA_M_range, SMA_L_range))
         
        performance = []
        for comb in combinations:
            self.prepare_data(smas = comb)
            self.run_backtest()
            performance.append(performance_function(self.results.strategy))
    
        self.results_overview =  pd.DataFrame(data = np.array(combinations), columns = ["SMA_S", "SMA_M", "SMA_L"])
        self.results_overview["performance"] = performance
        self.find_best_strategy()
        
        
    def find_best_strategy(self):
        ''' Finds the optimal strategy (global maximum).
        '''
        
        best = self.results_overview.nlargest(1, "performance")
        SMA_S = best.SMA_S.iloc[0]
        SMA_M = best.SMA_M.iloc[0]
        SMA_L = best.SMA_L.iloc[0]
        perf = best.performance.iloc[0]
        print("SMA_S: {} | SMA_M: {} | SMA_L : {} | {}: {}".format(SMA_S, SMA_M, SMA_L, self.metric, round(perf, 5)))  
        self.test_strategy(smas = (SMA_S, SMA_M, SMA_L))
        
    
    def add_sessions(self, visualize = False): # NEW!!!
        ''' 
        Adds/Labels Trading Sessions and their compound returns.
        
        Parameter
        ============
        visualize: bool, default False
            if True, visualize compound session returns over time
        '''
        
        if self.results is None:
            print("Run test_strategy() first.")
            
        data = self.results.copy()
        data["session"] = np.sign(data.trades).cumsum().shift().fillna(0)
        data["session_compound"] = data.groupby("session").strategy.cumsum().apply(np.exp) - 1
        self.results = data
        if visualize:
            data["session_compound"].plot(figsize = (12, 8))
            plt.show()  
        
    def add_leverage(self, leverage, report = True): # NEW!!!
        ''' 
        Adds Leverage to the Strategy.
        
        Parameter
        ============
        leverage: float (positive)
            degree of leverage.
        
        report: bool, default True
            if True, print Performance Report incl. Leverage.
        '''
        self.add_sessions()
        self.leverage = leverage
        
        data = self.results.copy()
        data["simple_ret"] = np.exp(data.strategy) - 1
        data["eff_lev"] = leverage * (1 + data.session_compound) / (1 + data.session_compound * leverage)
        data.eff_lev.fillna(leverage, inplace = True)
        data.loc[data.trades !=0, "eff_lev"] = leverage
        levered_returns = data.eff_lev.shift() * data.simple_ret
        levered_returns = np.where(levered_returns < -1, -1, levered_returns)
        data["strategy_levered"] = levered_returns
        data["cstrategy_levered"] = data.strategy_levered.add(1).cumprod()
        
        self.results = data
            
        if report:
            self.print_performance(leverage = True)
            
    ############################## Performance ######################################
    
    def print_performance(self, leverage = False): # Adj
        ''' Calculates and prints various Performance Metrics.
        '''
        
        data = self.results.copy()
        
        if leverage: # NEW!
            to_analyze = np.log(data.strategy_levered.add(1))
        else: 
            to_analyze = data.strategy
            
            
        strategy_multiple = round(self.calculate_multiple(to_analyze), 6)
        bh_multiple =       round(self.calculate_multiple(data.returns), 6)
        outperf =           round(strategy_multiple - bh_multiple, 6)
        cagr =              round(self.calculate_cagr(to_analyze), 6)
        ann_mean =          round(self.calculate_annualized_mean(to_analyze), 6)
        ann_std =           round(self.calculate_annualized_std(to_analyze), 6)
        sharpe =            round(self.calculate_sharpe(to_analyze), 6)
       
        print(100 * "=")
        print("BTC USDT ML STRATEGY - {}".format(granulity))
        print(100 * "-")
        print("PERFORMANCE MEASURES:")
        print("\n")
        print("Multiple (Strategy):         {}".format(strategy_multiple))
        print("Multiple (Buy-and-Hold):     {}".format(bh_multiple))
        print(38 * "-")
        print("Out-/Underperformance:       {}".format(outperf))
        print("\n")
        print("CAGR:                        {}".format(cagr))
        print("Annualized Mean:             {}".format(ann_mean))
        print("Annualized Std:              {}".format(ann_std))
        print("Sharpe Ratio:                {}".format(sharpe))
        
        print(100 * "=")
        
        
    def massive_strategy(self,granulity, l1, l2, w1, w2):
        for lag in range(l1, l2):
            for w in range(w1, w2):
                granulity=granulity
                filepath = "../Data/{}".format(granulity)
                symbol = "Close"
                tc = -0.0005
                window=w
                lags=lag
                hini=2
                hfin=12
                tester = ML_Backtester(filepath = filepath, symbol = symbol,
                                     tc = tc, granulity=granulity, window=window, lags=lags, hini=hini, hfin=hfin)
                tester.test_strategy()
    
    def calculate_multiple(self, series):
        return np.exp(series.sum())
    
    def calculate_cagr(self, series):
        return np.exp(series.sum())**(1/((series.index[-1] - series.index[0]).days / 365.25)) - 1
    
    def calculate_annualized_mean(self, series):
        return series.mean() * self.tp_year
    
    def calculate_annualized_std(self, series):
        return series.std() * np.sqrt(self.tp_year)
    
    def calculate_sharpe(self, series):
        if series.std() == 0:
            return np.nan
        else:
            return self.calculate_cagr(series) / self.calculate_annualized_std(series)

In [178]:
granulity="15m"
filepath = "../Data/{}".format(granulity)
symbol = "Close"
tc = -0.0005
window=50
lags=7
hini=2
hfin=12

In [179]:
tester = ML_Backtester(filepath = filepath, symbol = symbol,
tc = tc, granulity=granulity, window=window, lags=lags, hini=hini, hfin=hfin)

In [180]:
tester.test_strategy()

Model: "sequential_28"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 lstm_49 (LSTM)              (None, 8, 50)             10400     
                                                                 
 dropout_50 (Dropout)        (None, 8, 50)             0         
                                                                 
 lstm_50 (LSTM)              (None, 50)                20200     
                                                                 
 dropout_51 (Dropout)        (None, 50)                0         
                                                                 
 dense_28 (Dense)            (None, 1)                 51        
                                                                 
Total params: 30,651
Trainable params: 30,651
Non-trainable params: 0
_________________________________________________________________
None




INFO:tensorflow:Assets written to: ./Models/15m/DNN_BTCUSDT_15m_W50_L7/assets


INFO:tensorflow:Assets written to: ./Models/15m/DNN_BTCUSDT_15m_W50_L7/assets


BTC USDT ML STRATEGY - 15m
----------------------------------------------------------------------------------------------------
PERFORMANCE MEASURES:


Multiple (Strategy):         147646958544.31378
Multiple (Buy-and-Hold):     1.238548
--------------------------------------
Out-/Underperformance:       147646958543.07523


CAGR:                        1.9331609358729133e+17
Annualized Mean:             39.762665
Annualized Std:              0.413424
Sharpe Ratio:                4.675975567444626e+17
