# Backtesting with backtrader library
Notes:  
Timestamp must be in the exact format specified for datafeed   **Timestamp needs to be first column index**  
Params for datafeed need to be specified as tuple of tuples  
Within strategy data is accessed via self.datas[0] for ex: self.datas[0].high_delta[0] would get you the first timeperiod's high_delta  
The current data has already happened and cannot be used to execute an order (ex. you cannot look at the current close price and simultaneously buy the current close, the order will be executed at open instead), orders will be executed on the following day.

In [3]:
import backtrader as bt
import datetime
import pandas as pd

## Define Strategies and Data utils

In [4]:
class BasicStrategy(bt.Strategy):
    def __init__(self):
        # To keep track of pending orders
        self.order = None
        print('***The limit sells for this strategy are valid for 1 day***')
        
    def log(self, txt, dt=None):
        ''' Logging function for this strategy'''
        dt = dt or self.datas[0].datetime.date(0)
        print('%s, %s' % (dt.isoformat(), txt))

    def notify_order(self, order):
        if order.status in [order.Submitted, order.Accepted]:
            # Buy/Sell order submitted/accepted to/by broker - Nothing to do
            return

        # Check if an order has been completed
        # Attention: broker could reject order if not enough cash
        elif order.status in [order.Completed]:
            if order.isbuy():
                self.log('BUY EXECUTED, %.2f' % order.executed.price)
            elif order.issell():
                self.log('SELL EXECUTED, %.2f' % order.executed.price)
            
            self.bar_executed = len(self)
        
        elif order.status in [order.Canceled, order.Margin, order.Rejected]:
            self.log('Order Canceled/Margin/Rejected')
        
        # set no pending order
        self.order = None


    def next(self):
        # STRATEGY 
        data = self.datas[0]
        self.log(f'Current Portfolio Value : {self.broker.get_value()}')
        
        # cancel if there is an order pending, this strategy should have 1 working order per day
        if self.order:
            self.log('ORDER CANCELLED')
            self.cancel(self.order)
        
        # Check if we are in the market
        if not self.position:
            # BUY
            try:
                self.size = int(self.broker.get_cash() / self.datas[0].open[1])
            except:
                print('Size Exception. If at the end of data, ignore.')
            # invest if prediction looks good
            self.log(f'MARKET BUY CREATE {self.size} shares at next open, current close price: {data.close[0]}')
            self.buy(size=self.size) # market order buys at next open                      
                
        else:
            # place sell order at predicted high if predicted high is greater than current close price
            # TODO: Make prediction and close a filter in the class constructor (more optimal)
            if data.prediction[0] >= data.close[0]:
                self.log(f'LIMIT SELL CREATE {self.size} shares at {data.prediction[0]}')  
                self.sell(exectype=bt.Order.Limit,
                             price=data.prediction[0],
                             valid=data.datetime.date(0) + datetime.timedelta(days=2),
                             size=self.size)
        # if prediction is less than current value sell at open (ASAP)
            else:
                self.log('MARKET SELL CREATE. PREDICTION < CURRENT CLOSE')
                self.sell(size=self.size)
        

In [5]:
class BuyAndHold(bt.Strategy):
    def log(self, txt, dt=None):
        ''' Logging function for this strategy'''
        dt = dt or self.datas[0].datetime.date(0)
        print('%s, %s' % (dt.isoformat(), txt))

    def notify_order(self, order):
        if order.status in [order.Submitted, order.Accepted]:
            # Buy/Sell order submitted/accepted to/by broker - Nothing to do
            return

        # Check if an order has been completed
        # Attention: broker could reject order if not enough cash
        if order.status in [order.Completed]:
            if order.isbuy():
                self.log('BUY EXECUTED, %.2f' % order.executed.price)
            elif order.issell():
                self.log('SELL EXECUTED, %.2f' % order.executed.price)

            self.bar_executed = len(self)

      
    def start(self):
        self.val_start = self.broker.get_cash()  # keep the starting cash

    def nextstart(self):
        # Buy all the available cash
        size = int(self.broker.get_cash() / self.datas[0].open[1])
        self.buy(size=size)

    def stop(self):
        # calculate the actual returns
        self.roi = (self.broker.get_value() / self.val_start) - 1.0
        print(f'Stop price: {self.datas[0].close[0]}')
        print('ROI:        {:.2f}%'.format(100.0 * self.roi))

In [6]:
def prepare_data(data, fromdate, todate, filepath):
    """Prepare data for backtrader datafeed object
        Returns prepared data filepath and params for the GenericCSVData class, also returns columns used"""
    data['timestamp'] = pd.to_datetime(data['timestamp'])
    # start param setup for backtrader
    start = data['timestamp'].iloc[0]
    end = data['timestamp'].iloc[-1]
    from_to = [(start.year, start.month, start.day), (end.year, end.month, end.day)]
    # Backtrader string format
    data['timestamp'] = data['timestamp'].dt.strftime('%Y-%m-%d %H:%M:%S')
    # backtrader data feed class needs a file path, this could be a temp file that's constantly overwritten
    data.to_csv(filepath)
    
    starting_params = [
    ('fromdate', datetime.datetime(*from_to[0])),
    ('todate', datetime.datetime(*from_to[1])),
    ('nullvalue', 0.0),
    ('dtformat', ('%Y-%m-%d %H:%M:%S')),
    ('tmformat', ('%H:%M:%S')),
    ('datetime', 1)]
    # skip nonfeatures (timestamp)
    cols = data.columns[1:]
    # get column position for each indicator and add to starting params list
    i = 2 # starting index since others are reserved
    for indicator in cols:
        starting_params.append((indicator, i))
        i+=1
    final_params = tuple(starting_params)

    return filepath, final_params, cols 


## Import Data and Run Backtest

In [7]:
import os

In [9]:
absolutepath = os.path.abspath('')
fileDirectory = os.path.dirname(absolutepath)

#Path of parent directory (moves outside of repository)
parentDirectory = os.path.dirname(fileDirectory)

In [11]:
fileDirectory

'/Users/chris/Documents/GitHub/financial_forecasting_analysis'

In [12]:
def capstone_data_prep(pred_folder_name, pred_col_name, ticker_df_filepath, starting_cash=100000):
    """Main loop, preps data and executes backtrader on all stocks in the prediction folder"""
    ticker_backtesting_dict = {}
    for pred_file in os.listdir( fileDirectory + f'/data/ticker_predictions/{pred_folder_name}'):
        # collect data
        ticker_name = pred_file.split('_')[0]
        pred_df = pd.read_csv(fileDirectory + f'/data/ticker_predictions/{pred_folder_name}/{pred_file}')
        ticker_df = pd.read_csv(ticker_df_filepath + f'/{ticker_name}_full_data.csv')
        
        # make timestamp in column position 0, SPECIFIC TO FULL DATA IN FILE 
        ticker_df = ticker_df.rename({'reportperiod':'timestamp'}, axis=1)
        ticker_cols = list(ticker_df.columns)
        ticker_cols[5] = 'ts_cpy'
        ticker_cols[0] = 'timestamp'
        ticker_df.columns = ticker_cols
        ticker_df['timestamp'] = ticker_df['ts_cpy']
        # merge prediction data with full data, on key = timestamp
        ticker_df = ticker_df.merge(pred_df.loc[:, ['timestamp', pred_col_name]], on='timestamp')
        # new prediction column name = "prediction", important because backtester strategy looks for this column name
        ticker_df = ticker_df.rename({pred_col_name:'prediction'}, axis=1)
        # remove all non-prediction rows in prediction field
        ticker_df = ticker_df[ticker_df.prediction > 0]
        # skip nonfeatures, keep timestamp, SPECIFIC TO FULL DATA IN FILE 
        nonfeatures = ticker_df.columns[1:7]
        features = [col for col in ticker_df.columns if col not in nonfeatures]
        # get index of start and end dates for trading
        ticker_df = ticker_df[features]
        ticker_df.columns = [c.lower() for c in ticker_df.columns]
        idx1 = ticker_df.prediction[ticker_df.prediction > 0].index[0]
        idx2 = ticker_df.prediction[ticker_df.prediction > 0].index[-1]
        start_date = pred_df.timestamp[idx1]
        end_date  = pred_df.timestamp[idx2]
        
        # Prepare data for backtrader
        prep_data = prepare_data(ticker_df.copy(), start_date, end_date, 'temp/prep_data.csv')
        class DataFeed(bt.feeds.GenericCSVData):
            lines = tuple(prep_data[2])
            params = prep_data[1]
        
        print(f"--------Ticker Name-------: {ticker_name}")
        ##### BUY AND HOLD #######
        print('BUY AND HOLD')
        # initialize everything and run strategy
        cerebro = bt.Cerebro(cheat_on_open=True)
        cerebro.broker.setcash(starting_cash)
        #cerebro.addsizer(bt.sizers.FixedSize, stake=10)
        print(f'Starting Portfolio Value: {cerebro.broker.getvalue()}')
        data = DataFeed(dataname=prep_data[0])
        cerebro.adddata(data)
        cerebro.addstrategy(BuyAndHold)
        cerebro.addanalyzer(bt.analyzers.SharpeRatio, timeframe=bt.TimeFrame.Months, _name = 'sharpe')
        cerebro.addanalyzer(bt.analyzers.DrawDown, _name = 'dd')
        cerebro.addanalyzer(bt.analyzers.Returns, _name = 'returns')
        buy_hold = cerebro.run()
        buy_hold_ret = cerebro.broker.getvalue() / starting_cash - 1
        print(f'Final Portfolio Value: {cerebro.broker.getvalue()}\n')
        print('STRATEGY INFO:')
        print(buy_hold[0].analyzers.sharpe.get_analysis())
        print(buy_hold[0].analyzers.dd.get_analysis())
        # rtot is total log returns over the strategy time period
        print(buy_hold[0].analyzers.returns.get_analysis())

        #### MAIN STRATEGY #####
        cerebro = bt.Cerebro(cheat_on_open=True)
        cerebro.broker.setcash(starting_cash)
        print(f'Starting Portfolio Value: {cerebro.broker.getvalue()}')
        data = DataFeed(dataname=prep_data[0])
        cerebro.adddata(data)
        cerebro.addstrategy(BasicStrategy)
        cerebro.addanalyzer(bt.analyzers.SharpeRatio, timeframe=bt.TimeFrame.Months, _name = 'sharpe')
        cerebro.addanalyzer(bt.analyzers.DrawDown, _name = 'dd')
        cerebro.addanalyzer(bt.analyzers.Returns, _name = 'returns')
        back = cerebro.run()
        strat_ret = cerebro.broker.getvalue() / starting_cash - 1
        print(f'Final Portfolio Value: {cerebro.broker.getvalue()}\n')
        print('STRATEGY INFO:')
        print(back[0].analyzers.sharpe.get_analysis())
        print(back[0].analyzers.dd.get_analysis())
        # rtot is total log returns over the strategy time period
        print(back[0].analyzers.returns.get_analysis())
        print(f"For {ticker_name}, return was {buy_hold_ret*100}% for buy and hold vs {strat_ret*100}%  for strategy")
        print('\n'*5)

        ticker_backtesting_dict[ticker_name] = cerebro
        
    return ticker_backtesting_dict

In [19]:
# pred_folder_name, pred_col_name, ticker_df_filepath, starting_cash=100000
# /Users/chris/Documents/GitHub/financial_forecasting_analysis/data/ticker_predictions/LSTM_preds

cerebro_dict = capstone_data_prep('LSTM_preds', 'prediction', f'{fileDirectory}/data/ticker_data/')
cerebro_dict

--------Ticker Name-------: MSFT
BUY AND HOLD
Starting Portfolio Value: 100000
2019-09-27, BUY EXECUTED, 140.15
Stop price: 335.95001220703125
ROI:        139.61%
Final Portfolio Value: 239605.41305541992

STRATEGY INFO:
OrderedDict([('sharperatio', 0.5797021770069253)])
AutoOrderedDict([('len', 1), ('drawdown', 0.3085141850586886), ('moneydown', 741.5043334960938), ('max', AutoOrderedDict([('len', 98), ('drawdown', 28.219971018038613), ('moneydown', 37988.63912963867)]))])
OrderedDict([('rtot', 0.8738232720490078), ('ravg', 0.001630267298598895), ('rnorm', 0.5080649806090113), ('rnorm100', 50.80649806090113)])
Starting Portfolio Value: 100000
***The limit sells for this strategy are valid for 1 day***
2019-09-26, Current Portfolio Value : 100000.0
2019-09-26, MARKET BUY CREATE 713 shares at next open, current close price: 139.5399932861328
2019-09-27, BUY EXECUTED, 140.15
2019-09-27, Current Portfolio Value : 98274.54130554199
2019-09-27, LIMIT SELL CREATE 713 shares at 139.0926422989

{'MSFT': <backtrader.cerebro.Cerebro at 0x140ffdc70>,
 'HD': <backtrader.cerebro.Cerebro at 0x127678910>,
 'WAT': <backtrader.cerebro.Cerebro at 0x12752caf0>,
 'ADSK': <backtrader.cerebro.Cerebro at 0x140729190>,
 'UNH': <backtrader.cerebro.Cerebro at 0x140af2ac0>,
 'XOM': <backtrader.cerebro.Cerebro at 0x17710fa30>}

In [20]:
cerebro_dict

{'MSFT': <backtrader.cerebro.Cerebro at 0x140ffdc70>,
 'HD': <backtrader.cerebro.Cerebro at 0x127678910>,
 'WAT': <backtrader.cerebro.Cerebro at 0x12752caf0>,
 'ADSK': <backtrader.cerebro.Cerebro at 0x140729190>,
 'UNH': <backtrader.cerebro.Cerebro at 0x140af2ac0>,
 'XOM': <backtrader.cerebro.Cerebro at 0x17710fa30>}

### Library additionally has plotting
Not sure how to reformat this, it's pretty ugly

In [17]:
import matplotlib.pyplot as plt

plt.rcParams['figure.figsize'] = [10, 8]
plt.rcParams.update({'font.size': 12}) 
cerebro_dict['UNH'].plot()

ImportError: cannot import name 'warnings' from 'matplotlib.dates' (/opt/homebrew/Caskroom/miniforge/base/envs/other/lib/python3.8/site-packages/matplotlib/dates.py)

### Ways to Improve:

Increase strategy trading volume. There are certain periods where sell orders are not going through for multiple days in a row, meaning that the price is lower than anticipated by model for that period. For example, if the last sell order didn't go through, we can find a way to use that information to inform our next sell order and make the prediction lower or try a market sell. Maybe train a "bear" model, one that only knows pain and suffering of bear markets, and only produces pessimistic high price predictions.   

Smarter entry points. Currently, buy orders are being executed at market price at the open. Returns may increase if entry constraints are increased.  

Resizing orders. Orders for this strategy baseline are currently 100% in or 100% out of position. This has beneifits if you want less exposure to the long term movement of the stocks. Depending on the situation and strategy, we might want to reserve a cash position to expose ourselves less to short term movements.  