<a href="https://colab.research.google.com/github/ahsank/StockML/blob/main/Backtest.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

Backtesting
=======================

It uses *backtesting.py* Python framework for [backtesting](https://www.investopedia.com/terms/b/backtesting.asp) trading strategies. See [Quickstart](https://github.com/kernc/backtesting.py/blob/master/doc/examples/Quick%20Start%20User%20Guide.ipynb)


## Data
DataFrame should ideally be indexed with a _datetime index_ (convert it with [`pd.to_datetime()`](https://pandas.pydata.org/pandas-docs/stable/generated/pandas.to_datetime.html));
otherwise a simple range index will do.

In [1]:
!pip install backtesting

Collecting backtesting
  Downloading Backtesting-0.3.3.tar.gz (175 kB)
[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/175.5 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [91m━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m[90m╺[0m[90m━━━━━━━━━━━[0m [32m122.9/175.5 kB[0m [31m3.4 MB/s[0m eta [36m0:00:01[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m175.5/175.5 kB[0m [31m3.2 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
Building wheels for collected packages: backtesting
  Building wheel for backtesting (setup.py) ... [?25l[?25hdone
  Created wheel for backtesting: filename=Backtesting-0.3.3-py3-none-any.whl size=173916 sha256=6606ecc94bc7fabfbc43e3f41b0592a195c6b9a5e131c31f8c883e4572c1d202
  Stored in directory: /root/.cache/pip/wheels/e2/30/7f/19cbe31987c6ebdb47f1f510343249066711609e3da2d57176
Successfully built backtesting
Installing collected packages: backtesting
Successfully installed backtesting

In [2]:
!pip install yahoo_fin

Collecting yahoo_fin
  Downloading yahoo_fin-0.8.9.1-py3-none-any.whl (10 kB)
Collecting requests-html (from yahoo_fin)
  Downloading requests_html-0.10.0-py3-none-any.whl (13 kB)
Collecting feedparser (from yahoo_fin)
  Downloading feedparser-6.0.11-py3-none-any.whl (81 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m81.3/81.3 kB[0m [31m3.6 MB/s[0m eta [36m0:00:00[0m
Collecting sgmllib3k (from feedparser->yahoo_fin)
  Downloading sgmllib3k-1.0.0.tar.gz (5.8 kB)
  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting pyquery (from requests-html->yahoo_fin)
  Downloading pyquery-2.0.0-py3-none-any.whl (22 kB)
Collecting fake-useragent (from requests-html->yahoo_fin)
  Downloading fake_useragent-1.5.1-py3-none-any.whl (17 kB)
Collecting parse (from requests-html->yahoo_fin)
  Downloading parse-1.20.1-py2.py3-none-any.whl (20 kB)
Collecting bs4 (from requests-html->yahoo_fin)
  Downloading bs4-0.0.2-py2.py3-none-any.whl (1.2 kB)
Collecting w3lib (from requ

In [3]:
from yahoo_fin import stock_info
ticker = 'ARKK'
arkk = stock_info.get_data('ARKK')
spy = stock_info.get_data('SPY')


In [5]:
arkk.columns = map(str.title, arkk.columns)
spy.columns = map(str.title, spy.columns)
# df.Close = df.Adjclose
# df.drop('Adjclose', axis=1, inplace=True)
spy

Unnamed: 0,Open,High,Low,Close,Adjclose,Volume,Ticker
1993-01-29,43.968750,43.968750,43.750000,43.937500,24.763733,1003200,SPY
1993-02-01,43.968750,44.250000,43.968750,44.250000,24.939850,480500,SPY
1993-02-02,44.218750,44.375000,44.125000,44.343750,24.992697,201300,SPY
1993-02-03,44.406250,44.843750,44.375000,44.812500,25.256908,529400,SPY
1993-02-04,44.968750,45.093750,44.468750,45.000000,25.362574,531500,SPY
...,...,...,...,...,...,...,...
2024-04-17,506.049988,506.220001,499.119995,500.549988,500.549988,75910300,SPY
2024-04-18,501.980011,504.130005,498.559998,499.519989,499.519989,74548100,SPY
2024-04-19,499.440002,500.459991,493.859985,495.160004,495.160004,102129100,SPY
2024-04-22,497.829987,502.380005,495.429993,499.720001,499.720001,67763400,SPY


In [6]:
import pandas as pd


def SMA(values, n):
    """
    Return simple moving average of `values`, at
    each step taking into account `n` previous values.
    """
    return pd.Series(values).rolling(n).mean()

In [7]:
def ToSeries(values):
  return pd.Series(values)

In [8]:
from backtesting import Strategy
from backtesting.lib import crossover


class SmaCross(Strategy):
    # Define the two MA lags as *class variables*
    # for later optimization
    n1 = 10
    n2 = 20

    def init(self):
        # Precompute the two moving averages
        self.sma1 = self.I(SMA, self.data.Close, self.n1)
        self.sma2 = self.I(SMA, self.data.Close, self.n2)

    def next(self):
        # If sma1 crosses above sma2, close any existing
        # short trades, and buy the asset
        if crossover(self.sma1, self.sma2):
            self.position.close()
            self.buy()

        # Else, if sma1 crosses below sma2, close any existing
        # long trades, and sell the asset
        elif crossover(self.sma2, self.sma1):
            self.position.close()
            self.sell()



In [20]:
class AboveSma(Strategy):
    # Define the two MA lags as *class variables*
    # for later optimization
    n1 = 5
    n2 = 200
    n3 = 101 # Should be above 1% SMA

    def init(self):
        # Precompute the two moving averages
        self.sma1 = self.I(SMA, self.data.Close, self.n1)
        self.sma2 = self.I(SMA, self.data.Close, self.n2)
        self.Close = self.I(ToSeries, self.data.Close)

    def next(self):
        # If price crosses above sma1 and sma2, close any existing
        # short trades, and buy the asset
        if crossover(self.Close, self.sma1) and crossover(self.Close, self.sma2*(self.n3/100.0)):
            self.position.close()
            self.buy()

        # Else, if price crosses below sma1 and sma2, close any existing
        # long trades
        elif crossover(self.sma1, self.Close) and crossover(self.sma2, self.Close):
            self.position.close()
            # self.sell()

In [None]:
class CautiousSma(Strategy):
    # Define the two MA lags as *class variables*
    # for later optimization
    n1 = 5
    n2 = 200

    def init(self):
        # Precompute the two moving averages
        self.sma1 = self.I(SMA, self.data.Close, self.n1)
        self.sma2 = self.I(SMA, self.data.Close, self.n2)
        self.Close = self.I(ToSeries, self.data.Close)

    def next(self):
        # If price crosses above sma1 and sma2, close any existing
        # short trades, and buy the asset
        if crossover(self.Close, self.sma1) and crossover(self.Close, self.sma2):
            self.position.close()
            self.buy()
        # Else, if price crosses below sma1 or sma2, close any existing
        # long trades
        elif crossover(self.sma1, self.Close) or crossover(self.sma2, self.Close):
            self.position.close()
            # self.sell()

## Backtesting

 See
[`Backtest`](https://kernc.github.io/backtesting.py/doc/backtesting/backtesting.html#backtesting.backtesting.Backtest)


In [28]:
from backtesting import Backtest

bt = Backtest(arkk, AboveSma, cash=10_000, commission=0)
stats = bt.run()
stats

Start                     2014-10-31 00:00:00
End                       2024-04-23 00:00:00
Duration                   3462 days 00:00:00
Exposure Time [%]                   49.370805
Equity Final [$]                 44876.906145
Equity Peak [$]                  65794.831732
Return [%]                         348.769061
Buy & Hold Return [%]              110.942117
Return (Ann.) [%]                   17.198454
Volatility (Ann.) [%]                28.68971
Sharpe Ratio                         0.599464
Sortino Ratio                        1.007051
Calmar Ratio                         0.404809
Max. Drawdown [%]                  -42.485355
Avg. Drawdown [%]                   -4.734251
Max. Drawdown Duration     1166 days 00:00:00
Avg. Drawdown Duration       43 days 00:00:00
# Trades                                   14
Win Rate [%]                             50.0
Best Trade [%]                     152.250282
Worst Trade [%]                     -6.137094
Avg. Trade [%]                    



[`Backtest.plot()`](https://kernc.github.io/backtesting.py/doc/backtesting/backtesting.html#backtesting.backtesting.Backtest.plot)
method provides the same insights in a more visual form.

In [17]:
bt.plot()

  formatter=DatetimeTickFormatter(days=['%d %b', '%a %d'],
  formatter=DatetimeTickFormatter(days=['%d %b', '%a %d'],
  fig = gridplot(
  fig = gridplot(


## Optimization

 optimize the two parameters by calling
[`Backtest.optimize()`](https://kernc.github.io/backtesting.py/doc/backtesting/backtesting.html#backtesting.backtesting.Backtest.optimize)


In [22]:
%%time

stats = bt.optimize(n1=range(5, 30, 5),
                    n2=range(100, 250, 10),
                    n3 = range(100, 110, 1),
                    maximize='Equity Final [$]',
                    constraint=lambda param: param.n1 < param.n2)
stats

  output = _optimize_grid()


Backtest.optimize:   0%|          | 0/3 [00:00<?, ?it/s]

CPU times: user 1.16 s, sys: 137 ms, total: 1.29 s
Wall time: 2min 35s


Start                     1993-01-29 00:00:00
End                       2024-04-23 00:00:00
Duration                  11407 days 00:00:00
Exposure Time [%]                   67.293998
Equity Final [$]                152833.701523
Equity Peak [$]                 170733.174698
Return [%]                        1428.337015
Buy & Hold Return [%]             1041.644348
Return (Ann.) [%]                    9.130971
Volatility (Ann.) [%]                13.57979
Sharpe Ratio                         0.672394
Sortino Ratio                        1.044325
Calmar Ratio                         0.470642
Max. Drawdown [%]                  -19.401092
Avg. Drawdown [%]                    -1.74346
Max. Drawdown Duration     1167 days 00:00:00
Avg. Drawdown Duration       29 days 00:00:00
# Trades                                   50
Win Rate [%]                             60.0
Best Trade [%]                      64.683664
Worst Trade [%]                     -4.931444
Avg. Trade [%]                    

Check`stats['_strategy']`

In [23]:
stats._strategy

<Strategy AboveSma(n1=5,n2=200,n3=101)>

In [24]:
bt.plot(plot_volume=False, plot_pl=False)

  formatter=DatetimeTickFormatter(days=['%d %b', '%a %d'],
  formatter=DatetimeTickFormatter(days=['%d %b', '%a %d'],
  fig = gridplot(
  fig = gridplot(


Strategy optimization managed to up its initial performance _on in-sample data_ by almost 50% and even beat simple
[buy & hold](https://en.wikipedia.org/wiki/Buy_and_hold).
In real life optimization, however, do **take steps to avoid
[overfitting](https://en.wikipedia.org/wiki/Overfitting)**.

## Trade data

In addition to backtest statistics returned by
[`Backtest.run()`](https://kernc.github.io/backtesting.py/doc/backtesting/backtesting.html#backtesting.backtesting.Backtest.run)
shown above, you can look into _individual trade returns_ and the changing _equity curve_ and _drawdown_ by inspecting the last few, internal keys in the result series.

In [25]:
stats.tail()

Expectancy [%]                                             6.436903
SQN                                                         2.31614
_strategy                              AboveSma(n1=5,n2=200,n3=101)
_equity_curve                        Equity  DrawdownPct Drawdow...
_trades               Size  EntryBar  ExitBar  EntryPrice   Exit...
dtype: object

The columns should be self-explanatory.

In [26]:
stats['_equity_curve']  # Contains equity/drawdown curves. DrawdownDuration is only defined at ends of DD periods.

Unnamed: 0,Equity,DrawdownPct,DrawdownDuration
1993-01-29,10000.000000,0.000000,NaT
1993-02-01,10000.000000,0.000000,NaT
1993-02-02,10000.000000,0.000000,NaT
1993-02-03,10000.000000,0.000000,NaT
1993-02-04,10000.000000,0.000000,NaT
...,...,...,...
2024-04-17,152833.701523,0.104839,NaT
2024-04-18,152833.701523,0.104839,NaT
2024-04-19,152833.701523,0.104839,NaT
2024-04-22,152833.701523,0.104839,NaT


In [27]:
stats['_trades']  # Contains individual trade data

Unnamed: 0,Size,EntryBar,ExitBar,EntryPrice,ExitPrice,PnL,ReturnPct,EntryTime,ExitTime,Duration
0,213,278,283,46.84375,47.03125,39.9375,0.004003,1994-03-07,1994-03-14,7 days
1,213,283,350,47.03125,45.546875,-316.171875,-0.031561,1994-03-14,1994-06-20,98 days
2,208,396,442,46.6875,46.765625,16.25,0.001673,1994-08-24,1994-10-28,65 days
3,208,442,450,46.765625,47.328125,117.0,0.012028,1994-10-28,1994-11-09,12 days
4,208,450,454,47.328125,46.796875,-110.5,-0.011225,1994-11-09,1994-11-15,6 days
5,208,454,877,46.796875,64.28125,3636.75,0.373623,1994-11-15,1996-07-19,612 days
6,208,877,884,64.28125,63.4375,-175.5,-0.013126,1996-07-19,1996-07-30,11 days
7,199,912,1402,66.1875,109.0,8519.6875,0.646837,1996-09-09,1998-08-18,708 days
8,199,1402,1450,109.0,107.71875,-254.96875,-0.011755,1998-08-18,1998-10-26,69 days
9,194,1454,1796,110.125,140.1875,5832.125,0.272985,1998-10-30,2000-03-10,497 days


Learn more by exploring further
[examples](https://kernc.github.io/backtesting.py/doc/backtesting/index.html#tutorials)
or find more framework options in the
[full API reference](https://kernc.github.io/backtesting.py/doc/backtesting/index.html#header-submodules).