# Strategy Optimization &  Forward Testing

## Getting started

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
plt.style.use("seaborn-v0_8")

In [None]:
data = pd.read_csv("bitcoin.csv", parse_dates = ["Date"], index_col = "Date")
data

In [None]:
data.info()

In [None]:
data["returns"] = np.log(data.Close / data.Close.shift(1))
data

## Strategy Optimization (Part 1)

__Three Strategy Parameters:__

- Return Threshold: All Returns >= __90th__ Percentile labeled "Very High Return"
- Low and High Volume Change Threshold: All Volume Changes between __5th__ and __20th__ Percentile labeled "Moderate to High Decrease in Volume" 

__-> Strategy Parameters = (90, 5, 20)__

In [None]:
data

In [None]:
def backtest(data, parameters, tc):
    
    # prepare features
    data = data[["Close", "Volume", "returns"]].copy()
    data["vol_ch"] = np.log(data.Volume.div(data.Volume.shift(1)))
    data.loc[data.vol_ch > 3, "vol_ch"] = np.nan
    data.loc[data.vol_ch < -3, "vol_ch"] = np.nan
    
    # define trading positions
    return_thresh = np.percentile(data.returns.dropna(), parameters[0])
    cond1 = data.returns >= return_thresh
    volume_thresh = np.percentile(data.vol_ch.dropna(), [parameters[1], parameters[2]])
    cond2 = data.vol_ch.between(volume_thresh[0], volume_thresh[1])
    
    data["position"] = 1
    data.loc[cond1 & cond2, "position"] = 0
    
    # backtest
    data["strategy"] = data.position.shift(1) * data["returns"]
    data["trades"] = data.position.diff().fillna(0).abs()
    data.strategy = data.strategy + data.trades * tc
    data["creturns"] = data["returns"].cumsum().apply(np.exp)
    data["cstrategy"] = data["strategy"].cumsum().apply(np.exp)
    
    # return strategy multiple
    return data.cstrategy[-1] 

In [None]:
import warnings
warnings.filterwarnings('ignore')

In [None]:
backtest(data = data, parameters = (90, 5, 20), tc = -0.00085)

In [None]:
return_range = range(85, 98, 1) # potential values for return_thresh
vol_low_range = range(2, 16, 1) # potential values for vol_low
vol_high_range = range(16, 35, 1) # potential values for vol_high

In [None]:
list(return_range)

__Plan: Run Backtest for all combinations and find the best combination(s)__

In [None]:
from itertools import product

In [None]:
combinations = list(product(return_range, vol_low_range, vol_high_range))
combinations

In [None]:
len(combinations)

In [None]:
13 * 14 * 19

In [None]:
results = []
for comb in combinations:
    results.append(backtest(data = data, parameters = comb, tc = -0.00085))

In [None]:
many_results =  pd.DataFrame(data = combinations, columns = ["returns", "vol_low", "vol_high"])
many_results["performance"] = results

In [None]:
many_results

## Strategy Optimization (Part 2)

In [None]:
many_results

In [None]:
many_results.nlargest(20, "performance")

In [None]:
many_results.nsmallest(20, "performance")

In [None]:
many_results.groupby("returns").performance.mean().plot()
plt.show()

In [None]:
many_results.groupby("vol_low").performance.mean().plot()
plt.show()

In [None]:
many_results.groupby("vol_high").performance.mean().plot()
plt.show()

In [None]:
backtest(data = data, parameters = (94, 11, 27), tc = -0.00085)

In [None]:
backtest(data = data, parameters = (90, 5, 20), tc = -0.00085)

## Putting everything together: a Backtester Class

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from itertools import product
import warnings
warnings.filterwarnings("ignore")
plt.style.use("seaborn-v0_8")

__Why using OOP and creating a class?__

- Organizing/Storing/Linking all Functionalities and the Code in one Place/Class (managing/reducing complexity)
- Reusability of Code
- Framework for many other Strategies (only few adjustment required)

Note: You can find a __detailed Tutorial__ on OOP & Classes in the __Appendix__ (at the end of this course).

In [None]:
class Long_Only_Backtester():
    ''' Class for the vectorized backtesting of simple Long-only trading strategies.
    
    Attributes
    ============
    filepath: str
        local filepath of the dataset (csv-file)
    symbol: str
        ticker symbol (instrument) to be backtested
    start: str
        start date for data import
    end: str
        end date for data import
    tc: float
        proportional trading costs per trade
    
    
    Methods
    =======
    get_data:
        imports the data.
        
    test_strategy:
        prepares the data and backtests the trading strategy incl. reporting (wrapper).
        
    prepare_data:
        prepares the data for backtesting.
    
    run_backtest:
        runs the strategy backtest.
        
    plot_results:
        plots the cumulative performance of the trading strategy compared to buy-and-hold.
        
    optimize_strategy:
        backtests strategy for different parameter values incl. optimization and reporting (wrapper).
    
    find_best_strategy:
        finds the optimal strategy (global maximum).
         
        
    print_performance:
        calculates and prints various performance metrics.
        
    '''    
    
    def __init__(self, filepath, symbol, start, end, tc):
        
        self.filepath = filepath
        self.symbol = symbol
        self.start = start
        self.end = end
        self.tc = tc
        self.results = None
        self.get_data()
        self.tp_year = (self.data.Close.count() / ((self.data.index[-1] - self.data.index[0]).days / 365.25))
        
    def __repr__(self):
        return "Long_Only_Backtester(symbol = {}, start = {}, end = {})".format(self.symbol, self.start, self.end)
        
    def get_data(self):
        ''' Imports the data.
        '''
        raw = pd.read_csv(self.filepath, parse_dates = ["Date"], index_col = "Date")
        raw = raw.loc[self.start:self.end].copy()
        raw["returns"] = np.log(raw.Close / raw.Close.shift(1))
        self.data = raw
        
    def test_strategy(self, percentiles = None, thresh = None):
        '''
        Prepares the data and backtests the trading strategy incl. reporting (Wrapper).
         
        Parameters
        ============
        percentiles: tuple (return_perc, vol_low_perc, vol_high_perc)
            return and volume percentiles to be considered for the strategy.
            
        thresh: tuple (return_thresh, vol_low_thresh, vol_high_thesh)
            return and volume thresholds to be considered for the strategy.
        '''
        
        self.prepare_data(percentiles = percentiles, thresh = thresh)
        self.run_backtest()
        
        data = self.results.copy()
        data["creturns"] = data["returns"].cumsum().apply(np.exp)
        data["cstrategy"] = data["strategy"].cumsum().apply(np.exp)
        self.results = data
        
        self.print_performance()
    
    def prepare_data(self, percentiles, thresh):
        ''' Prepares the Data for Backtesting.
        '''
        
        data = self.data[["Close", "Volume", "returns"]].copy()
        data["vol_ch"] = np.log(data.Volume.div(data.Volume.shift(1)))
        data.loc[data.vol_ch > 3, "vol_ch"] = np.nan
        data.loc[data.vol_ch < -3, "vol_ch"] = np.nan        
        
        if percentiles:
            self.return_thresh = np.percentile(data.returns.dropna(), percentiles[0])
            self.volume_thresh = np.percentile(data.vol_ch.dropna(), [percentiles[1], percentiles[2]])
        elif thresh:
            self.return_thresh = thresh[0]
            self.volume_thresh = [thresh[1], thresh[2]]
                
        cond1 = data.returns >= self.return_thresh
        cond2 = data.vol_ch.between(self.volume_thresh[0], self.volume_thresh[1])
        
        data["position"] = 1
        data.loc[cond1 & cond2, "position"] = 0

        self.results = data
    
    def run_backtest(self):
        ''' Runs the strategy backtest.
        '''
        
        data = self.results.copy()
        data["strategy"] = data["position"].shift(1) * data["returns"]
        data["trades"] = data.position.diff().fillna(0).abs()
        data.strategy = data.strategy + data.trades * self.tc
        
        self.results = data
    
    def plot_results(self):
        '''  Plots the cumulative performance of the trading strategy compared to buy-and-hold.
        '''
        if self.results is None:
            print("Run test_strategy() first.")
        else:
            title = "{} | TC = {}".format(self.symbol, self.tc)
            self.results[["creturns", "cstrategy"]].plot(title=title, figsize=(12, 8))
            
    def optimize_strategy(self, return_range, vol_low_range, vol_high_range, metric = "Multiple"):
        '''
        Backtests strategy for different parameter values incl. Optimization and Reporting (Wrapper).
         
        Parameters
        ============
        return_range: tuple
            tuples of the form (start, end, step size).
            
        vol_low_range: tuple
            tuples of the form (start, end, step size).
        
        vol_high_range: tuple
            tuples of the form (start, end, step size).
        
        metric: str
            performance metric to be optimized (can be "Multiple" or "Sharpe")
        '''
        
        self.metric = metric
        
        if metric == "Multiple":
            performance_function = self.calculate_multiple
        elif metric == "Sharpe":
            performance_function = self.calculate_sharpe
        
        return_range = range(*return_range)
        vol_low_range = range(*vol_low_range)
        vol_high_range = range(*vol_high_range)
        
        combinations = list(product(return_range, vol_low_range, vol_high_range))
        
        performance = []
        for comb in combinations:
            self.prepare_data(percentiles = comb, thresh = None)
            self.run_backtest()
            performance.append(performance_function(self.results.strategy))
    
        self.results_overview =  pd.DataFrame(data = np.array(combinations), columns = ["returns", "vol_low", "vol_high"])
        self.results_overview["performance"] = performance
        self.find_best_strategy()
        
        
    def find_best_strategy(self):
        ''' Finds the optimal strategy (global maximum).
        '''
        
        best = self.results_overview.nlargest(1, "performance")
        return_perc = best.returns.iloc[0]
        vol_perc = [best.vol_low.iloc[0], best.vol_high.iloc[0]]
        perf = best.performance.iloc[0]
        print("Return_Perc: {} | Volume_Perc: {} | {}: {}".format(return_perc, vol_perc, self.metric, round(perf, 5))) 
        self.test_strategy(percentiles = (return_perc, vol_perc[0], vol_perc[1]))
            
    ############################## Performance ######################################
    
    def print_performance(self):
        ''' Calculates and prints various Performance Metrics.
        '''
        
        data = self.results.copy()
        strategy_multiple = round(self.calculate_multiple(data.strategy), 6)
        bh_multiple =       round(self.calculate_multiple(data.returns), 6)
        outperf =           round(strategy_multiple - bh_multiple, 6)
        cagr =              round(self.calculate_cagr(data.strategy), 6)
        ann_mean =          round(self.calculate_annualized_mean(data.strategy), 6)
        ann_std =           round(self.calculate_annualized_std(data.strategy), 6)
        sharpe =            round(self.calculate_sharpe(data.strategy), 6)
       
        print(100 * "=")
        print("SIMPLE PRICE & VOLUME STRATEGY | INSTRUMENT = {} | THRESHOLDS = {}, {}".format(self.symbol, np.round(self.return_thresh, 5), np.round(self.volume_thresh, 5)))
        print(100 * "-")
        print("PERFORMANCE MEASURES:")
        print("\n")
        print("Multiple (Strategy):         {}".format(strategy_multiple))
        print("Multiple (Buy-and-Hold):     {}".format(bh_multiple))
        print(38 * "-")
        print("Out-/Underperformance:       {}".format(outperf))
        print("\n")
        print("CAGR:                        {}".format(cagr))
        print("Annualized Mean:             {}".format(ann_mean))
        print("Annualized Std:              {}".format(ann_std))
        print("Sharpe Ratio:                {}".format(sharpe))
        
        print(100 * "=")
        
    def calculate_multiple(self, series):
        return np.exp(series.sum())
    
    def calculate_cagr(self, series):
        return np.exp(series.sum())**(1/((series.index[-1] - series.index[0]).days / 365.25)) - 1
    
    def calculate_annualized_mean(self, series):
        return series.mean() * self.tp_year
    
    def calculate_annualized_std(self, series):
        return series.std() * np.sqrt(self.tp_year)
    
    def calculate_sharpe(self, series):
        if series.std() == 0:
            return np.nan
        else:
            return self.calculate_cagr(series) / self.calculate_annualized_std(series)


In [None]:
filepath = "bitcoin.csv"
symbol = "BTCUSDT"
start = "2017-08-17"
end = "2021-10-07"
tc = -0.00085

In [None]:
tester = Long_Only_Backtester(filepath = filepath, symbol = symbol,
                              start = start, end = end, tc = tc)

In [None]:
tester

In [None]:
tester.data

In [None]:
tester.test_strategy(percentiles = (90, 5, 20))

In [None]:
tester.plot_results()

In [None]:
tester.results

In [None]:
return_thresh = tester.return_thresh
return_thresh

In [None]:
volume_thresh = tester.volume_thresh
volume_thresh

In [None]:
tester.test_strategy(thresh = (return_thresh, volume_thresh[0], volume_thresh[1]))

In [None]:
tester.optimize_strategy(return_range = (85, 98, 1), 
                         vol_low_range = (2, 16, 1), 
                         vol_high_range = (16, 35, 1))

In [None]:
tester.results_overview.nlargest(20, "performance")

In [None]:
tester.results

In [None]:
tester.plot_results()

## Backtesting & Forward Testing (Part 1)

__Great Backtesting Results - Too good to be true?__

Two major Problems: 

- __Data Snooping / "Over-Optimization"__ -> Will these parameters work with new/fresh data as well? <br>
- __Look-Ahead-Bias__ -> we know all future price and volume data from day 1 to calculate percentiles/thresholds. 

__Will this strategy outperform Buy-and-Hold in the Future?__

- wait months/year(s) and analyze then (not an option)
- split past data into __Backtesting Set__ (optimize Strategy) and __Forward Testing Set__ (test optimized Strategy on fresh data)

__Backtesting & Optimization (until the end of 2020)__

In [None]:
filepath = "bitcoin.csv"
symbol = "BTCUSDT"
start = "2017-08-17"
end = "2020-12-31"
tc = -0.00085

In [None]:
tester = Long_Only_Backtester(filepath = filepath, symbol = symbol, start = start, end = end, tc = tc)
tester

In [None]:
tester.optimize_strategy((85, 98, 1), (2, 16, 1), (16, 35, 1))

In [None]:
many_results = tester.results_overview
many_results

In [None]:
many_results.groupby("returns").performance.mean().plot()
plt.show()

In [None]:
many_results.groupby("vol_low").performance.mean().plot()
plt.show()

In [None]:
many_results.groupby("vol_high").performance.mean().plot()
plt.show()

In [None]:
tester.test_strategy((94, 11, 28))

In [None]:
tester.plot_results()

In [None]:
return_thresh = tester.return_thresh
return_thresh

In [None]:
volume_thresh = tester.volume_thresh
volume_thresh

__Forward Testing (starting at 2021-01-01)__

we need: thresholds from backtesting

In [None]:
return_thresh

In [None]:
volume_thresh

In [None]:
filepath = "bitcoin.csv"
symbol = "BTCUSDT"
start = "2021-01-01"
end = "2021-10-7"
tc = -0.00085

In [None]:
tester = Long_Only_Backtester(filepath = filepath, symbol = symbol, start = start, end = end, tc = tc)
tester

In [None]:
tester.test_strategy(thresh = (return_thresh, volume_thresh[0], volume_thresh[1]))

In [None]:
tester.plot_results()

In [None]:
tester.results.position.value_counts()

__Reasons for Performance Difference between Backtesting and Forward Testing:__

- Data Snooping / Over-Optimization (partly)
- Look-Ahead-Bias (partly)

In [None]:
tester.optimize_strategy((85, 98, 1), (5, 15, 1), (15, 35, 1))

In [None]:
tester.plot_results()

- Overall Regime Change (Patterns can change over time)
- Strategy not powerful enough