<a href="https://colab.research.google.com/github/ahsank/StockML/blob/main/Backtest.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

Backtesting
=======================

It uses *backtesting.py* Python framework for [backtesting](https://www.investopedia.com/terms/b/backtesting.asp) trading strategies. See [Quickstart](https://github.com/kernc/backtesting.py/blob/master/doc/examples/Quick%20Start%20User%20Guide.ipynb)


## Data
DataFrame should ideally be indexed with a _datetime index_ (convert it with [`pd.to_datetime()`](https://pandas.pydata.org/pandas-docs/stable/generated/pandas.to_datetime.html));
otherwise a simple range index will do.

In [1]:
!pip install backtesting

Collecting backtesting
  Downloading Backtesting-0.3.3.tar.gz (175 kB)
[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/175.5 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [91m━━━━━━━━━━━━━━[0m[90m╺[0m[90m━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m61.4/175.5 kB[0m [31m1.7 MB/s[0m eta [36m0:00:01[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m175.5/175.5 kB[0m [31m2.5 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
Building wheels for collected packages: backtesting
  Building wheel for backtesting (setup.py) ... [?25l[?25hdone
  Created wheel for backtesting: filename=Backtesting-0.3.3-py3-none-any.whl size=173916 sha256=513871c688a33284bc4ca1d743272ce9db3af5c1eebfffb06dcbe4ad0d0b5bdd
  Stored in directory: /root/.cache/pip/wheels/e2/30/7f/19cbe31987c6ebdb47f1f510343249066711609e3da2d57176
Successfully built backtesting
Installing collected packages: backtesting
Successfully installed backtesting-

In [4]:
!pip install yahoo_fin

Collecting yahoo_fin
  Downloading yahoo_fin-0.8.9.1-py3-none-any.whl (10 kB)
Collecting requests-html (from yahoo_fin)
  Downloading requests_html-0.10.0-py3-none-any.whl (13 kB)
Collecting feedparser (from yahoo_fin)
  Downloading feedparser-6.0.11-py3-none-any.whl (81 kB)
[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/81.3 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m81.3/81.3 kB[0m [31m2.8 MB/s[0m eta [36m0:00:00[0m
Collecting sgmllib3k (from feedparser->yahoo_fin)
  Downloading sgmllib3k-1.0.0.tar.gz (5.8 kB)
  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting pyquery (from requests-html->yahoo_fin)
  Downloading pyquery-2.0.0-py3-none-any.whl (22 kB)
Collecting fake-useragent (from requests-html->yahoo_fin)
  Downloading fake_useragent-1.5.1-py3-none-any.whl (17 kB)
Collecting parse (from requests-html->yahoo_fin)
  Downloading parse-1.20.1-py2.py3-none-any.whl (20 kB)
Collecting bs4

In [5]:
from yahoo_fin import stock_info
tickers = ['ARKK', 'SPY', 'XLE', 'QQQ']
num_days = 1200 # 6 years
dfs = {}
for ticker in tickers:
  df = stock_info.get_data(ticker, start_date='2016-01-01')
  df.columns = map(str.title, df.columns)
  # df['Unadjusted'] = df.Close
  # df.Close = df.Adjclose
  # df.drop('Adjclose', axis=1, inplace=True)
  # df = df.tail(num_days)
  dfs[ticker] = df


In [6]:
from yahoo_fin import stock_info
tickers = ['ARKK', 'SPY', 'XLE', 'QQQ', 'NVDA', 'MSFT', 'PTON', 'ETSY', 'COIN']
dfs = {}
for ticker in tickers:
  df = stock_info.get_data(ticker, start_date='2016-01-01')
  df.columns = map(str.title, df.columns)
  df['Unadjusted'] = df.Close
  df.Close = df.Adjclose
  df.drop('Adjclose', axis=1, inplace=True)
  dfs[ticker] = df

In [None]:
# Use Adjclose, Doesn't work
# spy.Close = spy.Adjclose
# spy.drop('Adjclose', axis=1, inplace=True)

In [16]:
from backtesting import Strategy
from backtesting.lib import crossover
from backtesting import Backtest

In [7]:
import pandas as pd


def SMA(values, n):
    """
    Return simple moving average of `values`, at
    each step taking into account `n` previous values.
    """
    return pd.Series(values).rolling(n, min_periods=1).mean()

In [8]:
def ToSeries(values):
  return pd.Series(values)

In [13]:


class SmaCross(Strategy):
    # Define the two MA lags as *class variables*
    # for later optimization
    n1 = 5 # 10
    n2 = 200 # 20
    init = False

    def init(self):
        # Precompute the two moving averages
        self.sma1 = self.I(SMA, self.data.Close, self.n1)
        self.sma2 = self.I(SMA, self.data.Close, self.n2)

    def next(self):
        if self.init:
          self.buy()
          self.init = False
        # If sma1 crosses above sma2, close any existing
        # short trades, and buy the asset
        if crossover(self.sma1, self.sma2):
            # self.position.close()
            self.buy()

        # Else, if sma1 crosses below sma2, close any existing
        # long trades, and sell the asset
        elif crossover(self.sma2, self.sma1):
            self.position.close()
            # self.sell()



In [None]:
class SmaCross1(Strategy):
    # Define the two MA lags as *class variables*
    # for later optimization
    n1 = 5 # 10
    n2 = 200 # 20
    init = False
    lastClose = 0
    daydiff = 20

    def init(self):
        # Precompute the two moving averages
        self.sma1 = self.I(SMA, self.data.Close, self.n1)
        self.sma2 = self.I(SMA, self.data.Close, self.n2)

    def next(self):
        if self.init:
          self.buy()
          self.init = False
        # If sma1 crosses above sma2, close any existing
        # short trades, and buy the asset
        if crossover(self.sma1, self.sma2):
          # print(self.sma1.size, self.lastClose)
          if self.sma1.size > self.lastClose + self.daydiff:
            # self.position.close()
            self.buy()

        # Else, if sma1 crosses below sma2, close any existing
        # long trades, and sell the asset
        elif crossover(self.sma2, self.sma1):
            if self.position.is_long:
              self.lastClose = self.sma1.size
            self.position.close()
            # self.sell()

In [None]:
class AboveSma(Strategy):
    # Define the two MA lags as *class variables*
    # for later optimization
    n1 = 5
    n2 = 200
    n3 = 100 # Should be above 1% SMA

    def init(self):
        # Precompute the two moving averages
        self.sma1 = self.I(SMA, self.data.Close, self.n1)
        self.sma2 = self.I(SMA, self.data.Close, self.n2)
        self.Close = self.I(ToSeries, self.data.Close)

    def next(self):
        # If price crosses above sma1 and sma2, close any existing
        # short trades, and buy the asset
        if crossover(self.Close, self.sma1) and crossover(self.Close, self.sma2):
            # self.position.close()
            self.buy()

        # Else, if price crosses below sma1 and sma2, close any existing
        # long trades
        elif crossover(self.sma1, self.Close) and crossover(self.sma2, self.Close):
            self.position.close()
            # self.sell()

In [None]:
class CautiousSma(Strategy):
    # Define the two MA lags as *class variables*
    # for later optimization
    n1 = 5
    n2 = 200

    def init(self):
        # Precompute the two moving averages
        self.sma1 = self.I(SMA, self.data.Close, self.n1)
        self.sma2 = self.I(SMA, self.data.Close, self.n2)
        self.Close = self.I(ToSeries, self.data.Close)

    def next(self):
        # If price crosses above sma1 and sma2, close any existing
        # short trades, and buy the asset
        if crossover(self.Close, self.sma1) and crossover(self.Close, self.sma2):
            self.position.close()
            self.buy()
        # Else, if price crosses below sma1 or sma2, close any existing
        # long trades
        elif crossover(self.sma1, self.Close) or crossover(self.sma2, self.Close):
            self.position.close()
            # self.sell()

In [10]:
def HighRange(values, m, n=0):
    """
    Return High value of range (-m, -n) days
    """
    return pd.Series(values).shift(n).fillna(values[0]).rolling(m, min_periods=1).max()

In [11]:
def LowRange(values, m, n=0):
    """
    Return High value of range (-m, -n) days
    """
    return pd.Series(values).shift(n).fillna(values[0]).rolling(m, min_periods=1).min()

In [None]:
dfs['SPY']

Unnamed: 0,Open,High,Low,Close,Volume,Ticker,Unadjusted
2016-01-04,200.490005,201.029999,198.589996,174.043213,222353500,SPY,201.020004
2016-01-05,201.399994,201.899994,200.050003,174.337570,110845800,SPY,201.360001
2016-01-06,198.339996,200.059998,197.600006,172.138458,152112600,SPY,198.820007
2016-01-07,195.330002,197.440002,193.589996,168.008591,213436100,SPY,194.050003
2016-01-08,195.190002,195.850006,191.580002,166.164413,209817200,SPY,191.919998
...,...,...,...,...,...,...,...
2024-04-17,506.049988,506.220001,499.119995,500.549988,75910300,SPY,500.549988
2024-04-18,501.980011,504.130005,498.559998,499.519989,74548100,SPY,499.519989
2024-04-19,499.440002,500.459991,493.859985,495.160004,102129100,SPY,495.160004
2024-04-22,497.829987,502.380005,495.429993,499.720001,67961000,SPY,499.720001


In [None]:
HighRange(dfs['SPY']['Close'].values, 200, 200)

0       174.043213
1       174.043213
2       174.043213
3       174.043213
4       174.043213
           ...    
2085    438.575531
2086    439.080139
2087    439.080139
2088    439.080139
2089    439.080139
Length: 2090, dtype: float64

In [None]:
class AboveSmaAndLY(Strategy):
    # Define the two MA lags as *class variables*
    # for later optimization
    n1 = 5
    n2 = 200
    n3 = 200
    n4 = 100
    useyh = False
    daydiff = 20
    lastClose = 0

    def init(self):
        # Precompute the two moving averages
        self.sma1 = self.I(SMA, self.data.Close, self.n1)
        self.sma2 = self.I(SMA, self.data.Close, self.n2)
        self.Close = self.I(ToSeries, self.data.Close)
        self.YHigh = self.I(HighRange, self.data.Close, self.n3)
        self.LYHigh = self.I(HighRange, self.data.Close, self.n3, self.n3)
        self.YLow = self.I(LowRange, self.data.Close, self.n3)
        # self.LYLow = self.I(LowRange, self.data.Close, self.n3, self.n3)

    def next(self):
        close = self.Close[-1]
        closeadj = close*self.n4/100.0
        # If price crosses above sma1 and sma2, close any existing
        # short trades, and buy the asset
        if close > self.sma1[-1] and \
              close > self.sma2[-1] and \
              self.Close.size > self.lastClose + self.daydiff and \
              (close > (self.YLow[-1] + self.LYHigh[-1] + self.YHigh[-1])/3 or not self.useyh):
              # self.position.close()
              # print(self.Close[-1])
              self.buy()

        # Else, if price crosses below sma1 and sma2, close any existing
        # long trades
        elif (self.sma1[-1] > closeadj and \
            self.sma2[-1] > closeadj and \
            ((self.LYHigh[-1] + self.YHigh[-1])/2 > closeadj or not self.useyh)):
            # print(self.position.pl_pct)
            if self.position.is_long:
              self.lastClose = self.Close.size
            self.position.close()
            # self.sell()

In [50]:
class AboveSmaAndLY1(Strategy):
    # Define the two MA lags as *class variables*
    # for later optimization
    n1 = 5
    n2 = 200
    n3 = 200
    n4 = 100
    useyh = False
    daydiff = 20
    lastClose = 0
    initbuy = False

    def init(self):
        # Precompute the two moving averages
        self.sma1 = self.I(SMA, self.data.Close, self.n1)
        self.sma2 = self.I(SMA, self.data.Close, self.n2)
        self.YHigh = self.I(HighRange, self.data.Close, self.n3)
        self.YLow = self.I(LowRange, self.data.Close, self.n3)
        self.LYHigh = self.I(HighRange, self.data.Close, self.n3, self.n3)

    def next(self):
        price = self.sma1[-1]
        if not self.initbuy:
          self.buy()
          self.initbuy = True
        # If price crosses above sma1 and sma2, close any existing
        # short trades, and buy the asset
        if price > self.sma2[-1] and \
              self.sma1.size > self.lastClose + self.daydiff and \
              (price > (self.YLow[-1] + self.LYHigh[-1] + self.YHigh[-1])/3 or not self.useyh):
              # self.position.close()
              # print(self.Close[-1])
              self.buy()

        # Else, if price crosses below sma1 and sma2, close any existing
        # long trades
        elif (self.sma2[-1] > price and \
            ((self.LYHigh[-1] + self.YHigh[-1])/2 > price or not self.useyh)):
            # print(self.position.pl_pct)
            if self.position.is_long:
              self.lastClose = self.sma1.size
            self.position.close()
            # self.sell()

In [51]:
strategy = AboveSmaAndLY1
bt = Backtest(dfs['SPY'], strategy, cash=10_000, commission=0)
stats = bt.run()
stats

Start                     2016-01-04 00:00:00
End                       2024-04-24 00:00:00
Duration                   3033 days 00:00:00
Exposure Time [%]                   79.244381
Equity Final [$]                 20621.613098
Equity Peak [$]                  21376.012512
Return [%]                         106.216131
Buy & Hold Return [%]              189.761344
Return (Ann.) [%]                    9.114147
Volatility (Ann.) [%]               18.344747
Sharpe Ratio                         0.496826
Sortino Ratio                        0.763014
Calmar Ratio                         0.487345
Max. Drawdown [%]                  -18.701625
Avg. Drawdown [%]                   -1.601158
Max. Drawdown Duration      814 days 00:00:00
Avg. Drawdown Duration       25 days 00:00:00
# Trades                                    9
Win Rate [%]                        66.666667
Best Trade [%]                      43.856891
Worst Trade [%]                     -6.983072
Avg. Trade [%]                    

## Backtesting

 See
[`Backtest`](https://kernc.github.io/backtesting.py/doc/backtesting/backtesting.html#backtesting.backtesting.Backtest)


In [52]:
from backtesting import Backtest

cols = ['Start', 'End', 'Duration', 'Exposure Time [%]', 'Equity Final [$]',
       'Equity Peak [$]', 'Return [%]', 'Buy & Hold Return [%]',
       'Return (Ann.) [%]', 'Volatility (Ann.) [%]', 'Sharpe Ratio',
       'Sortino Ratio', 'Calmar Ratio', 'Max. Drawdown [%]',
       'Avg. Drawdown [%]', 'Max. Drawdown Duration', 'Avg. Drawdown Duration',
       '# Trades', 'Win Rate [%]', 'Best Trade [%]', 'Worst Trade [%]',
       'Avg. Trade [%]', 'Max. Trade Duration', 'Avg. Trade Duration',
       'Profit Factor', 'Expectancy [%]', 'SQN', ]
starr = {}
bts = {}
for ticker in dfs.keys():
  tmpbt = Backtest(dfs[ticker], strategy, cash=10_000, commission=0)
  tmpstats = tmpbt.run()
  starr[ticker] = tmpstats
  bts[ticker] = tmpbt




In [53]:
statsdf = pd.DataFrame(starr).T[cols]
statsdf.mean()

Start                     2017-01-03 08:00:00
End                       2024-04-24 00:00:00
Duration                   2667 days 16:00:00
Exposure Time [%]                   60.555357
Equity Final [$]                158981.239998
Equity Peak [$]                 195794.658455
Return [%]                          1489.8124
Buy & Hold Return [%]             1373.222796
Return (Ann.) [%]                   24.600319
Volatility (Ann.) [%]               41.606004
Sharpe Ratio                         0.570586
Sortino Ratio                        1.170017
Calmar Ratio                         0.651737
Max. Drawdown [%]                  -36.527529
Avg. Drawdown [%]                   -6.064726
Max. Drawdown Duration      927 days 13:20:00
Avg. Drawdown Duration       51 days 10:40:00
# Trades                             7.666667
Win Rate [%]                        46.548822
Best Trade [%]                     201.261964
Worst Trade [%]                    -11.815102
Avg. Trade [%]                    



[`Backtest.plot()`](https://kernc.github.io/backtesting.py/doc/backtesting/backtesting.html#backtesting.backtesting.Backtest.plot)
method provides the same insights in a more visual form.

In [None]:
bt.plot()

  formatter=DatetimeTickFormatter(days=['%d %b', '%a %d'],
  formatter=DatetimeTickFormatter(days=['%d %b', '%a %d'],
  fig = gridplot(
  fig = gridplot(


## Optimization

 optimize the two parameters by calling
[`Backtest.optimize()`](https://kernc.github.io/backtesting.py/doc/backtesting/backtesting.html#backtesting.backtesting.Backtest.optimize)


In [None]:
%%time

# stats = bt.optimize(n1=range(5, 30, 5),
#                     n2=range(200, 250, 10),
#                     n3 = range(100, 110, 1),
#                     maximize='Equity Final [$]',
#                     constraint=lambda param: param.n1 < param.n2)
stats = bt.optimize(n1=range(5, 30, 5),
                             n2=range(20, 250, 10),
                              maximize='Equity Final [$]',
                              constraint=lambda param: param.n1 < param.n2)
stats

Backtest.optimize:   0%|          | 0/3 [00:00<?, ?it/s]

CPU times: user 251 ms, sys: 73.4 ms, total: 324 ms
Wall time: 12.6 s


Start                     2016-01-04 00:00:00
End                       2024-04-23 00:00:00
Duration                   3032 days 00:00:00
Exposure Time [%]                   79.904306
Equity Final [$]                 21871.193909
Equity Peak [$]                  22676.416443
Return [%]                         118.711939
Buy & Hold Return [%]              190.531291
Return (Ann.) [%]                    9.895483
Volatility (Ann.) [%]               17.465497
Sharpe Ratio                         0.566573
Sortino Ratio                        0.880712
Calmar Ratio                         0.584704
Max. Drawdown [%]                  -16.923917
Avg. Drawdown [%]                   -1.503866
Max. Drawdown Duration      841 days 00:00:00
Avg. Drawdown Duration       24 days 00:00:00
# Trades                                   11
Win Rate [%]                        45.454545
Best Trade [%]                      45.711834
Worst Trade [%]                     -3.383317
Avg. Trade [%]                    

Check`stats['_strategy']`

In [None]:
stats._strategy

<Strategy AboveSmaAndLY1(n1=5,n2=180)>

In [None]:
bts['NVDA'].plot(plot_volume=False, plot_pl=False)

  formatter=DatetimeTickFormatter(days=['%d %b', '%a %d'],
  formatter=DatetimeTickFormatter(days=['%d %b', '%a %d'],
  fig = gridplot(
  fig = gridplot(


Strategy optimization managed to up its initial performance _on in-sample data_ by almost 50% and even beat simple
[buy & hold](https://en.wikipedia.org/wiki/Buy_and_hold).
In real life optimization, however, do **take steps to avoid
[overfitting](https://en.wikipedia.org/wiki/Overfitting)**.

## Trade data

In addition to backtest statistics returned by
[`Backtest.run()`](https://kernc.github.io/backtesting.py/doc/backtesting/backtesting.html#backtesting.backtesting.Backtest.run)
shown above, you can look into _individual trade returns_ and the changing _equity curve_ and _drawdown_ by inspecting the last few, internal keys in the result series.

In [None]:
stats.tail()

Expectancy [%]                                            14.702499
SQN                                                        1.231193
_strategy                                AboveSmaAndLY(n1=5,n2=190)
_equity_curve                        Equity  DrawdownPct Drawdow...
_trades               Size  EntryBar  ExitBar  EntryPrice   Exit...
dtype: object

The columns should be self-explanatory.

In [None]:
starr['NVDA']['_equity_curve']  # Contains equity/drawdown curves. DrawdownDuration is only defined at ends of DD periods.

Unnamed: 0,Equity,DrawdownPct,DrawdownDuration
2016-01-04,10000.000000,0.000000,NaT
2016-01-05,10000.000000,0.000000,NaT
2016-01-06,10000.000000,0.000000,NaT
2016-01-07,10000.000000,0.000000,NaT
2016-01-08,10000.000000,0.000000,NaT
...,...,...,...
2024-04-17,252202.130066,0.115400,NaT
2024-04-18,254110.143982,0.108708,NaT
2024-04-19,228697.137390,0.197844,NaT
2024-04-22,238651.135193,0.162931,NaT


In [None]:
stats = starr['NVDA']
stats['_trades'].tail(50)  # Contains individual trade data

Unnamed: 0,Size,EntryBar,ExitBar,EntryPrice,ExitPrice,PnL,ReturnPct,EntryTime,ExitTime,Duration
0,1236,2,6,8.0875,7.5525,-661.259811,-0.066151,2016-01-06,2016-01-12,6 days
1,1194,34,702,7.8175,59.982498,62285.007677,6.672849,2016-02-23,2018-10-16,966 days
2,1,900,903,42.285,38.450001,-3.834999,-0.090694,2019-08-01,2019-08-06,5 days
3,1656,893,903,43.224998,38.450001,-7907.39621,-0.110468,2019-07-23,2019-08-06,14 days
4,1474,924,1305,43.217499,129.5,127180.406799,1.996471,2019-09-05,2021-03-11,553 days
5,1335,1326,1555,142.897507,213.089996,93706.973648,0.491209,2021-04-12,2022-03-08,330 days
6,1141,1576,1580,249.339996,225.369995,-27349.771393,-0.096134,2022-04-06,2022-04-12,6 days
7,1500,1751,1754,171.460007,160.639999,-16230.010986,-0.063105,2022-12-15,2022-12-20,5 days
8,1334,1775,2089,180.639999,807.690002,836484.704071,3.471269,2023-01-23,2024-04-23,456 days


Learn more by exploring further
[examples](https://kernc.github.io/backtesting.py/doc/backtesting/index.html#tutorials)
or find more framework options in the
[full API reference](https://kernc.github.io/backtesting.py/doc/backtesting/index.html#header-submodules).