<a href="https://colab.research.google.com/github/tleitch/HFstrategies21/blob/main/assignments/assignment%202.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Backtesting an ML strategy with Backtrader

## Imports & Settings

In [13]:
import warnings
warnings.filterwarnings('ignore')

In [14]:
!pip install backtrader
!pip install git+https://github.com/quantopian/pyfolio
!pip install yfinance
!pip install --upgrade tables
%matplotlib inline

from pathlib import Path
import csv
from time import time
import datetime
import numpy as np
import pandas as pd
import pandas_datareader.data as web
import matplotlib.pyplot as plt
import seaborn as sns

import backtrader as bt
from backtrader.feeds import PandasData

import pyfolio as pf

import bs4 as bs
import requests
import yfinance as yf
import datetime
from datetime import date

Collecting git+https://github.com/quantopian/pyfolio
  Cloning https://github.com/quantopian/pyfolio to /tmp/pip-req-build-_918re41
  Running command git clone -q https://github.com/quantopian/pyfolio /tmp/pip-req-build-_918re41
Building wheels for collected packages: pyfolio
  Building wheel for pyfolio (setup.py) ... [?25l[?25hdone
  Created wheel for pyfolio: filename=pyfolio-0.9.2+75.g4b901f6-cp37-none-any.whl size=75776 sha256=be80a8135aedb3e5cda4fe1feb153f631e1ec774e071be4d623e7288a54b2271
  Stored in directory: /tmp/pip-ephem-wheel-cache-orvz57zt/wheels/62/7d/a7/3e462442ba7d63c35414176627c886340521dc3dbc0893ce9f
Successfully built pyfolio
Requirement already up-to-date: tables in /usr/local/lib/python3.7/dist-packages (3.6.1)


In [31]:
pd.set_option('display.expand_frame_repr', False)
np.random.seed(42)
sns.set_style('darkgrid')

In [32]:
def format_time(t):
    m_, s = divmod(t, 60)
    h, m = divmod(m_, 60)
    return f'{h:>02.0f}:{m:>02.0f}:{s:>02.0f}'

## Backtrader Setup

### Custom Commission Scheme

In [33]:
class FixedCommisionScheme(bt.CommInfoBase):
    """
    Simple fixed commission scheme for demo
    """
    params = (
        ('commission', .02),
        ('stocklike', True),
        ('commtype', bt.CommInfoBase.COMM_FIXED),
    )

    def _getcommission(self, size, price, pseudoexec):
        return abs(size) * self.p.commission

### DataFrame Loader

In [34]:
OHLCV = ['open', 'high', 'low', 'close', 'volume']

In [35]:
class SignalData(PandasData):
    """
    Define pandas DataFrame structure
    """
    cols = OHLCV + ['predicted']

    # create lines
    lines = tuple(cols)

    # define parameters
    params = {c: -1 for c in cols}
    params.update({'datetime': None})
    params = tuple(params.items())

### Strategy

In [48]:

class MLStrategy(bt.Strategy):
    params = (('n_positions', 10),
              ('min_positions', 5),
              ('verbose', False),
              ('log_file', 'backtest.csv'))

    def log(self, txt, dt=None):
        """ Logger for the strategy"""
        dt = dt or self.datas[0].datetime.datetime(0)
        with Path(self.p.log_file).open('a') as f:
            log_writer = csv.writer(f)
            log_writer.writerow([dt.isoformat()] + txt.split(','))

    def notify_order(self, order):
        if order.status in [order.Submitted, order.Accepted]:
            return

        # Check if an order has been completed
        # broker could reject order if not enough cash
        if self.p.verbose:
            if order.status in [order.Completed]:
                p = order.executed.price
                if order.isbuy():
                    self.log(f'{order.data._name},BUY executed,{p:.2f}')
                elif order.issell():
                    self.log(f'{order.data._name},SELL executed,{p:.2f}')

            elif order.status in [order.Canceled, order.Margin, order.Rejected]:
                self.log(f'{order.data._name},Order Canceled/Margin/Rejected')

    # bt calls prenext instead of next unless
    # all datafeeds have current values
    # => call next to avoid duplicating logic
    def prenext(self):
        self.next()

    def next(self):
        today = self.datas[0].datetime.date()
#         if today.weekday() not in [0, 3]: # only trade on Mondays;
#             return
        positions = [d._name for d, pos in self.getpositions().items() if pos]
        up, down = {}, {}
        missing = not_missing = 0
        for data in self.datas:
            if data.datetime.date() == today:
                if data.predicted[0] > 0:
                    up[data._name] = data.predicted[0]
                elif data.predicted[0] < 0:
                    down[data._name] = data.predicted[0]

        # sort dictionaries ascending/descending by value
        # returns list of tuples
        shorts = sorted(down, key=down.get)[:self.p.n_positions]
        longs = sorted(up, key=up.get, reverse=True)[:self.p.n_positions]
        n_shorts, n_longs = len(shorts), len(longs)
        
        # only take positions if at least min_n longs and shorts
        if n_shorts < self.p.min_positions or n_longs < self.p.min_positions:
            longs, shorts = [], []
        for ticker in positions:
            if ticker not in longs + shorts:
                self.order_target_percent(data=ticker, target=0)
                self.log(f'{ticker},CLOSING ORDER CREATED')

        short_target = -1 / max(self.p.n_positions, n_shorts)
        long_target = 1 / max(self.p.n_positions, n_longs)
        for ticker in shorts:
            self.order_target_percent(data=ticker, target=short_target)
            self.log('{ticker},SHORT ORDER CREATED')
        for ticker in longs:
            self.order_target_percent(data=ticker, target=long_target)
            self.log('{ticker},LONG ORDER CREATED')

Includes an option to only trade on certain weekdays in lines 39/40.

In [49]:
# assignment 2 -- This is how you decide to go long using a simple moving average
# You can change the fast and slow moving averages here 

class SmaCrossLong(bt.Strategy):
    # list of parameters which are configurable for the strategy
    params = dict(
        pfast=10,  # period for the fast moving average
        pslow=30   # period for the slow moving average
    )

    def __init__(self):
        sma1 = bt.ind.SMA(period=self.p.pfast)  # fast moving average
        sma2 = bt.ind.SMA(period=self.p.pslow)  # slow moving average
        self.crossover = bt.ind.CrossOver(sma1, sma2)  # crossover signal

    def next(self):
        if not self.position:  # not in the market
            if self.crossover > 0:  # if fast crosses slow to the upside
                self.buy()  # enter long

        elif self.crossover < 0:  # in the market & cross to the downside
            self.close()  # close long position

In [37]:
# assignment 2 -- This is how you decide to go short using a simple moving average
# You can change the fast and slow moving averages here 

class SmaCrossShort(bt.Strategy):
    # list of parameters which are configurable for the strategy
    params = dict(
        pfast=8,  # period for the fast moving average
        pslow=35   # period for the slow moving average
    )

    def __init__(self):
        sma1 = bt.ind.SMA(period=self.p.pfast)  # fast moving average
        sma2 = bt.ind.SMA(period=self.p.pslow)  # slow moving average
        self.crossover = bt.ind.CrossOver(sma1, sma2)  # crossover signal

    def next(self):
        if not self.position:  # not in the market
            if self.crossover < 0:  # if fast crosses slow to the downside
                self.sell()  # enter short

        elif self.crossover > 0:  # in the market & cross to the upside
            self.close()  # close long position

### Create and Configure Cerebro Instance

In [50]:
cerebro = bt.Cerebro()  # create a "Cerebro" instance
cerebro.addstrategy(SmaCrossLong)
cerebro.addstrategy(SmaCrossShort)
cash = 10000
comminfo = FixedCommisionScheme()
cerebro.broker.addcommissioninfo(comminfo)#
cerebro.broker.setcash(cash)

### Add input data

In [51]:
# Assignment 2 -- Here is where you add/choose tickers - data comes from yahoo finance


tickers = ["AAPL","AMZN","FB"]
for ticker in tickers:
    dataframe = yf.download(tickers=ticker,start="2020-01-01", end="2020-12-31")
    dataframe["predicted"] = np.random.uniform(-1,1,size=len(dataframe))
    dataframe.index.name = 'datetime'
    bt_data = SignalData(dataname=dataframe)
    cerebro.adddata(bt_data, name=ticker)

[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed


### Run Strategy Backtest

In [52]:
# Assignment 2 -- Run strategy. Look at changing n_positon and min_positions as you add tickers
cerebro.addanalyzer(bt.analyzers.PyFolio, _name='pyfolio')
cerebro.addstrategy(MLStrategy, n_positions=3, min_positions=0, 
                    verbose=True, log_file='bt_log.csv')
start = time()
results = cerebro.run()
ending_value = cerebro.broker.getvalue()
duration = time() - start

print(f'Final Portfolio Value: {ending_value:,.2f}')
print(f'Duration: {format_time(duration)}')

Final Portfolio Value: 13,514.11
Duration: 00:00:01


### Plot result

Disabled because large number of datafeeds all plot separately.

In [41]:
cerebro.plot()  # plot the results


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

[[<Figure size 432x288 with 9 Axes>], [<Figure size 432x288 with 9 Axes>]]

### Get `pyfolio` inputs

In [42]:
# prepare pyfolio inputs
pyfolio_analyzer = results[0].analyzers.getbyname('pyfolio')
returns, positions, transactions, gross_lev = pyfolio_analyzer.get_pf_items()

returns.to_hdf('backtrader.h5', 'returns')
positions.to_hdf('backtrader.h5', 'positions')
transactions.to_hdf('backtrader.h5', 'transactions/')
gross_lev.to_hdf('backtrader.h5', 'gross_lev')

## Run pyfolio analysis

In [43]:
returns = pd.read_hdf('backtrader.h5', 'returns')
positions = pd.read_hdf('backtrader.h5', 'positions')
transactions = pd.read_hdf('backtrader.h5', 'transactions/')
gross_lev = pd.read_hdf('backtrader.h5', 'gross_lev')

In [44]:
benchmark = web.DataReader('SP500', 'fred', start = date (2019, 12, 31), end = date (2020, 12, 31)).squeeze()
benchmark = benchmark.pct_change().tz_localize('UTC')

In [45]:
daily_tx = transactions.groupby(level=0)
longs = daily_tx.value.apply(lambda x: x.where(x>0).sum())
shorts = daily_tx.value.apply(lambda x: x.where(x<0).sum())

In [46]:
fig, axes = plt.subplots(ncols=2, figsize=(15, 5))

df = returns.to_frame('Strategy').join(benchmark.to_frame('Benchmark (S&P 500)'))
df.add(1).cumprod().sub(1).plot(ax=axes[0], title='Cumulative Return')

longs.plot(label='Long',ax=axes[1], title='Positions')
shorts.plot(ax=axes[1], label='Short')
positions.cash.plot(ax=axes[1], label='PF Value')
axes[1].legend()
sns.despine()
fig.tight_layout();

<IPython.core.display.Javascript object>

In [47]:
pf.create_full_tear_sheet(returns,
                          transactions=transactions,
                          positions=positions,
                          benchmark_rets=benchmark.dropna())

Start date,2020-01-02,2020-01-02
End date,2020-12-30,2020-12-30
Total months,12,12
Unnamed: 0_level_3,Backtest,Unnamed: 2_level_3
Annual return,0.0%,
Cumulative returns,0.0%,
Annual volatility,0.0%,
Sharpe ratio,,
Calmar ratio,,
Stability,0.00,
Max drawdown,0.0%,
Omega ratio,,
Sortino ratio,,
Skew,0.00,


Worst drawdown periods,Net drawdown in %,Peak date,Valley date,Recovery date,Duration
0,0.0,2020-01-02,2020-01-02,2020-01-02,1.0
1,,NaT,NaT,NaT,
2,,NaT,NaT,NaT,
3,,NaT,NaT,NaT,
4,,NaT,NaT,NaT,


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

Top 10 long positions of all time,max


Top 10 short positions of all time,max


Top 10 positions of all time,max
FB,0.00%


<IPython.core.display.Javascript object>