In [None]:
%matplotlib inline
%load_ext autoreload
%autoreload 2

import datetime
import os.path
import backtrader as bt
from backtrader.indicators import SMA
import numpy as np
from pprint import pformat
import pandas as pd
from pandas import Series, DataFrame
import matplotlib.pyplot as plt
import datetime
import random

## Assignment
1. Create a walk forward equity curve.  Use periods of 252d/63d IS and OOS trading days, 
which maps to 365d/90d calendar days (will write FAQ later on different periods vs. timeframes).

2. Average your ratios of annualized OOS/IS performance for all runs (Walk Forward Efficiency).

3. Upload your *.ipynb to #promotion and include a screenshot of your Walk Forward Efficiency, 
and Walk Forward Equity Curve.



## Strategy

In [None]:
# Create a Strategy
class TestStrategy(bt.Strategy):
    params = (
        ('fast', 20),
        ('slow', 50),
        ('optim_fs', (20,50)),
        ('optim', False),
        ('printlog', False),
    )
    
    def log(self, txt, dt=None, doprint=False):
        """ Logging function fot this strategy"""
        if self.params.printlog or doprint:
            dt = dt or self.datas[0].datetime.date(0)
            print('%s, %s' % (dt.isoformat(), txt))

    def __init__(self):
        # Keep a reference to the "close" line in the data[0] dataseries
        self.dataclose = self.datas[0].close

        # To keep track of pending orders and buy price/commission
        self.order = None
        self.buyprice = None
        self.buycomm = None
        
        # When analyzing, tuples are generated.
        if self.params.optim:   
            self.params.fast, self.params.slow = self.params.optim_fs
        
        if self.params.fast > self.params.slow:
            raise ValueError(
                "A SMAC strategy cannot have the fast moving average's window be " + \
                 "greater than the slow moving average window.")

        # The moving averages
        self.fast_ma = SMA(self.datas[0], period=self.params.fast, plotname="FastMA")
        self.slow_ma = SMA(self.datas[0], period=self.params.slow, plotname="SlowMA")
        self.signal = self.fast_ma - self.slow_ma

    def notify_order(self, order):
        if order.status in [order.Submitted, order.Accepted]:
            # Buy/Sell order submitted/accepted to/by broker - Nothing to do
            return

        # Check if an order has been completed
        # Attention: broker could reject order if not enough cash
        if order.status in [order.Completed]:
            if order.isbuy():
                self.log(
                    'BUY EXECUTED, Price: %.2f, Cost: %.2f, Comm %.2f' %
                    (order.executed.price,
                     order.executed.value,
                     order.executed.comm))

                self.buyprice = order.executed.price
                self.buycomm = order.executed.comm
            else:  # Sell
                self.log('SELL EXECUTED, Price: %.2f, Cost: %.2f, Comm %.2f' %
                         (order.executed.price,
                          order.executed.value,
                          order.executed.comm))

            self.bar_executed = len(self)

        elif order.status in [order.Canceled, order.Margin, order.Rejected]:
            self.log('Order Canceled/Margin/Rejected')

        # Write down: no pending order
        self.order = None

                           
    def notify_trade(self, trade):
        if not trade.isclosed:
            return

        self.log('OPERATION PROFIT, GROSS %.2f, NET %.2f' %
                 (trade.pnl, trade.pnlcomm))

                           
    def next(self):
        # Simply log the closing price of the series from the reference
        self.log('Close, %.2f' % self.dataclose[0])

        # Check if an order is pending ... if yes, we cannot send a 2nd one
        if self.order:
            return

        # Check if we are in the market
        if not self.position:

            # Not yet ... we MIGHT BUY if ...
            if self.signal[0] > 0 and self.signal[-1] <= 0:

                # BUY, BUY, BUY!!! (with all possible default parameters)
                self.log('BUY CREATE, %.2f' % self.dataclose[0])

                # Keep track of the created order to avoid a 2nd order
                self.order = self.buy()

        else:

            if self.signal[0] <= 0 and self.signal[-1] > 0:
                # SELL, SELL, SELL!!! (with all possible default parameters)
                self.log('SELL CREATE, %.2f' % self.dataclose[0])

                # Keep track of the created order to avoid a 2nd order
                self.order = self.sell()

    def stop(self):
        self.log(f"Fast: {self.params.fast}; Slow: {self.params.slow}; Val: {self.broker.getvalue()}.", doprint=True)

## Analyzers

In [None]:
class AcctStats(bt.Analyzer):
    
    def __init__(self):
        self.start_cash = None
        self.end_cash = None
        self.fast_ma = self.strategy.params.fast
        self.slow_ma = self.strategy.params.slow
        self.num_trades = None
        self.equity = []

    def start(self):
        # Not needed ... but could be used
        self.start_cash = self.strategy.broker.cash 
        self.num_trades = 0

    def next(self):
        # Not needed ... but could be used
        pass

    def notify_trade(self, trade):
        if trade.isclosed:
            self.equity.append(self.strategy.broker.getvalue())
            self.num_trades += 1
            
    def stop(self):
        self.end_cash = self.strategy.broker.cash
        self.final_val = self.strategy.broker.get_value()

    def get_analysis(self):
        equity = np.asarray([self.start_cash,] + self.equity)

        return {
            'params': (self.fast_ma, self.slow_ma),
            'profit': self.final_val - self.start_cash,
            'num_trades': self.num_trades,
            'trades': np.diff(equity).tolist(),
        }
    

In [None]:
def best_result_from_cerebro_opti_run(result):
    params  = []
    n_trades = []
    profit   = []
    trades   = []
    for res in result:
        r = res[0].analyzers.custom.get_analysis()
        params.append(r['params'])
        n_trades.append(r['num_trades'])
        profit.append(r['profit'])
        trades.append(r['trades'])

    prof_ind = np.argmax(profit) 
    best_params = params[prof_ind]
    best_profit = profit[prof_ind]
    best_ntrades = n_trades[prof_ind]
    best_trades = trades[prof_ind]
    
    print('best:{} profit:{} trades:{}'.format(best_params, best_profit, best_ntrades))
    return (best_params, best_profit, best_ntrades, best_trades)

## Walkforward Equity Curve

In [None]:
class WalkforwardStrategy(bt.Strategy):

    params = (
        ('fast', 20),
        ('slow', 50),
        ('optim_fs', (20,50)),
        ('optim', False),
        ('live', False),
        ('walkforward', None),
        ('printlog', False),
    )

    def log(self, txt, dt=None, doprint=False):
        """ Logging function fot this strategy"""
        if self.params.printlog or doprint:
            dt = dt or self.datas[0].datetime.date(0)
            print('%s, %s' % (dt.isoformat(), txt))

    def __init__(self):
        # Keep a reference to the "close" line in the data[0] dataseries
        self.dataclose = self.datas[0].close

        # To keep track of pending orders and buy price/commission
        self.order = None
        self.buyprice = None
        self.buycomm = None

        # When analyzing, tuples are generated.
        if self.params.optim:   
            self.params.fast, self.params.slow = self.params.optim_fs
        
        if self.params.fast > self.params.slow:
            raise ValueError(
                "A SMAC strategy cannot have the fast moving average's window be " + \
                 "greater than the slow moving average window.")

        # The moving averages
        self.fast_ma = SMA(self.datas[0], period=self.params.fast, plotname="FastMA")
        self.slow_ma = SMA(self.datas[0], period=self.params.slow, plotname="SlowMA")
        self.signal = self.fast_ma - self.slow_ma

        
        # Trim the indicators if we are running live, just need the current one
        if not self.params.live:
            pass
        
        
        self.current_row = None
        self.wfsma = []
        
        if self.params.walkforward:
            for i in self.params.walkforward:
                self.wfsma.append({
                    'fast_ma': SMA(self.datas[0], period=i['train_param'][0]),
                    'slow_ma': SMA(self.datas[0], period=i['train_param'][1]),
                    'signal': SMA(self.datas[0], period=i['train_param'][0]) - SMA(self.datas[0], period=self.params.slow, plotname="SlowMA"),
                    'test_period': i['test_period'],
                    'train_param': i['train_param'],
                }) 

        print(pformat(self.wfsma))
        
        

    def notify_order(self, order):
        if order.status in [order.Submitted, order.Accepted]:
            # Buy/Sell order submitted/accepted to/by broker - Nothing to do
            return

        # Check if an order has been completed
        # Attention: broker could reject order if not enough cash
        if order.status in [order.Completed]:
            if order.isbuy():
                self.log(
                    'BUY EXECUTED, Price: %.2f, Cost: %.2f, Comm %.2f' %
                    (order.executed.price,
                     order.executed.value,
                     order.executed.comm))

                self.buyprice = order.executed.price
                self.buycomm = order.executed.comm
            else:  # Sell
                self.log('SELL EXECUTED, Price: %.2f, Cost: %.2f, Comm %.2f' %
                         (order.executed.price,
                          order.executed.value,
                          order.executed.comm))

            self.bar_executed = len(self)

        elif order.status in [order.Canceled, order.Margin, order.Rejected]:
            self.log('Order Canceled/Margin/Rejected')

        # Write down: no pending order
        self.order = None

    def notify_trade(self, trade):
        if not trade.isclosed:
            return

        self.log('OPERATION PROFIT, GROSS %.2f, NET %.2f' %
                 (trade.pnl, trade.pnlcomm))

    def next(self):

        # Walk-forward logic
        for row in self.wfsma:
            start, end = row['test_period']
            period_start = datetime.datetime.strptime(start, '%Y-%m-%d')
            period_end = datetime.datetime.strptime(end, '%Y-%m-%d')
            if self.datetime.datetime() >= period_start and self.datetime.datetime() < period_end:
                self.current_row = row
        
        # Simply log the closing price of the series from the reference
        self.log('Close, %.2f' % self.dataclose[0])

        # Check if an order is pending ... if yes, we cannot send a 2nd one
        if self.order:
            return

        # Check if we are in the market
        if not self.position:

            # Not yet ... we MIGHT BUY if ...
            if self.current_row['signal'][0] > 0 and self.current_row['signal'][-1] <= 0:

                # BUY, BUY, BUY!!! (with all possible default parameters)
                self.log('BUY CREATE, %.2f' % self.dataclose[0])

                # Keep track of the created order to avoid a 2nd order
                self.order = self.buy()

        else:

            if self.current_row['signal'][0] <= 0 and self.current_row['signal'][-1] > 0:
                # SELL, SELL, SELL!!! (with all possible default parameters)
                self.log('SELL CREATE, %.2f' % self.dataclose[0])

                # Keep track of the created order to avoid a 2nd order
                self.order = self.sell()

    def stop(self):
        self.log(f"Fast: {self.params.fast}; Slow: {self.params.slow}; Val: {self.broker.getvalue()}.", doprint=True)

In [None]:
ticker = 'CL'
folder_name = '5min'
suffix = '5min_20160103_20190405'

df = pd.read_parquet(
    os.path.join(
        '../data/processed/{}/'.format(folder_name), 
        '{}_{}.parquet'.format(ticker, suffix)
    )
)
df = (df.resample(
    '4h', 
    label='left', 
    base=18).agg({
        'Open': 'first', 
        'High': 'max', 
        'Low': 'min', 
        'Close': 'last', 
        'Volume': 'sum'
    })
)
df.columns = [col_name.lower() for col_name in df.columns]
df = df.dropna()
df['2017-01-01':'2017-04-01']['close'].plot()

## In-sample Parameters

In [None]:
periods = [
    {'run': 0, 'oos': ('2017-04-01', '2017-05-01'), 'is': ('2017-01-01', '2017-04-01')},
    {'run': 1, 'oos': ('2017-05-01', '2017-06-01'), 'is': ('2017-02-01', '2017-05-01')},
    {'run': 2, 'oos': ('2017-06-01', '2017-07-01'), 'is': ('2017-03-01', '2017-06-01')},
    {'run': 3, 'oos': ('2017-07-01', '2017-08-01'), 'is': ('2017-04-01', '2017-07-01')},
    {'run': 4, 'oos': ('2017-08-01', '2017-09-01'), 'is': ('2017-05-01', '2017-08-01')},
    {'run': 5, 'oos': ('2017-09-01', '2017-10-01'), 'is': ('2017-06-01', '2017-09-01')},
    {'run': 6, 'oos': ('2017-10-01', '2017-11-01'), 'is': ('2017-07-01', '2017-10-01')},
    {'run': 7, 'oos': ('2017-11-01', '2017-12-01'), 'is': ('2017-08-01', '2017-11-01')},
    {'run': 8, 'oos': ('2017-12-01', '2018-01-01'), 'is': ('2017-09-01', '2017-12-01')},
    {'run': 9, 'oos': ('2018-01-01', '2018-02-01'), 'is': ('2017-10-01', '2018-01-01')},
    {'run': 10, 'oos': ('2018-02-01', '2018-03-01'), 'is': ('2017-11-01', '2018-02-01')},
    {'run': 11, 'oos': ('2018-03-01', '2018-04-01'), 'is': ('2017-12-01', '2018-03-01')},
    {'run': 12, 'oos': ('2018-04-01', '2018-05-01'), 'is': ('2018-01-01', '2018-04-01')},
    {'run': 13, 'oos': ('2018-05-01', '2018-06-01'), 'is': ('2018-02-01', '2018-05-01')},
]

In [None]:
# Avoids duplication
windowset = set()

# Generate a bunch time windows for comparison.
# This will be used within the analyzer to get the 'best' strategy.
while len(windowset) < 40:
    f = random.randint(1, 10) * 5
    s = random.randint(1, 10) * 10
    if f > s:    # Cannot have the fast moving average have a longer window than the slow, so swap
        f, s = s, f
    elif f == s:    # Cannot be equal, so do nothing, discarding results
        pass
    windowset.add((f, s))
 
windows = list(windowset)
windows

best_oos_params = []
for oos_is in periods:
    start_date, end_date = oos_is['is']
    print('Start/End: {} - {}'.format(start_date, end_date))

    cerebro = bt.Cerebro()
    strats = cerebro.optstrategy(
        TestStrategy,
        optim=True, 
        optim_fs=windows
    )

    cerebro.addanalyzer(AcctStats, _name='custom')

    cerebro.optreturn = False
    cerebro.broker.setcash(100000.0)
    cerebro.addsizer(bt.sizers.FixedSize, stake=1000)
    cerebro.broker.setcommission(commission=0.0)

    data = bt.feeds.PandasData(dataname = df[start_date:end_date])
    cerebro.adddata(data)

    results = cerebro.run(maxcpus=3)
    best_param, best_profit, best_trades, best_equity = best_result_from_cerebro_opti_run(results)

    best_oos_params.append({'train_param': best_param, 
                                      'train_profit': best_profit, 
                                      'train_numtrades': best_trades, 
                                      'train_tradeslist': best_equity, 
                                      'train_period': oos_is['is'],
                                      'test_period': oos_is['oos'],
                                     })
    print('')

print('The best OOS parameters are: {}'.format(pformat(best_oos_params)))

## Out-of-sample Parameters

In [None]:
# Get in-sample and out-of-sample best parameters with correct pre-allocation
idx = 0
test_netprofit = []
for bestoos in best_oos_params:
    # Get the insample time period
    start_date, end_date = bestoos['test_period']
    
    print('Start/End: {} - {}'.format(start_date, end_date))
    cerebro = bt.Cerebro()

    cerebro.addstrategy(TestStrategy,
                        optim=True,
                        optim_fs=bestoos['train_param'])

    
    cerebro.addanalyzer(AcctStats, _name='custom')

    cerebro.broker.setcash(100000.0)
    cerebro.addsizer(bt.sizers.FixedSize, stake=1000)
    cerebro.broker.setcommission(commission=0.0)

    data = bt.feeds.PandasData(dataname = df[start_date:end_date])
    cerebro.adddata(data)

    results = cerebro.run(maxcpus=1)
    r = results[0].analyzers.custom.get_analysis()
    best_oos_params[idx]['test_numtrades'] = r['num_trades']
    best_oos_params[idx]['test_tradeslist'] = r['trades']
    best_oos_params[idx]['test_profit'] = r['profit']
    test_netprofit.append(r['profit'])
    idx += 1

## Walkforward Efficiency

In [None]:
def days_from_date_tuple(mytuple):
    """Get days between dates to annualize"""
    days_start = datetime.datetime.strptime(mytuple[0], '%Y-%m-%d')
    days_end = datetime.datetime.strptime(mytuple[1], '%Y-%m-%d')
    days = (days_end - days_start).days
    return days

oos_trades = []
wfe_list = []
for values in best_oos_params:
    oos_trades_list = values['test_tradeslist']
    oos_trades.extend(oos_trades_list)
    
    insample_days = days_from_date_tuple(values['train_period'])
    oos_days = days_from_date_tuple(values['test_period'])

    insample_annual_profit = 365/insample_days * values['train_profit']
    oos_annual_profit = 365/oos_days * values['test_profit']

    walkforward_efficiency_pct = round(oos_annual_profit * 100 / insample_annual_profit, 1)
    
    wfe_list.append(walkforward_efficiency_pct)

    print('Run {} WFE:{}'.format(idx, walkforward_efficiency_pct))
    print('\t IS_DAYS: {} OOS_DAYS: {} IS_PROFIT: {:0.2f} OOS_PROFIT: {:0.2f}'.format(insample_days, oos_days,
                                                                            insample_annual_profit, oos_annual_profit))

print('Average WFE: {}%'.format(np.asarray(wfe_list).mean()))

In [None]:
# Create a cerebro entity
cerebro = bt.Cerebro()

# Add a strategy
cerebro.addstrategy(WalkforwardStrategy, walkforward=best_oos_params)

# Load data
ticker = 'CL'
folder_name = '5min'
suffix = '5min_20160103_20190405'

df = pd.read_parquet(
    os.path.join(
        '../data/processed/{}/'.format(folder_name), 
        '{}_{}.parquet'.format(ticker, suffix)
    )
)
df = (df.resample(
    '4h', 
    label='left', 
    base=18).agg({
        'Open': 'first', 
        'High': 'max', 
        'Low': 'min', 
        'Close': 'last', 
        'Volume': 'sum'
    })
)
df.columns = [col_name.lower() for col_name in df.columns]
df = df.dropna()

# periods
start_date = best_oos_params[0]['test_period'][0]
end_date = best_oos_params[-1]['test_period'][0]

print('Start: {} End: {}'.format(start_date, end_date))
data = bt.feeds.PandasData(dataname = df[start_date:end_date])


# Add the Data Feed to Cerebro
cerebro.adddata(data)
cerebro.addanalyzer(AcctStats, _name='custom')

# Set our desired cash start
cerebro.broker.setcash(100000.0)
cerebro.addsizer(bt.sizers.FixedSize, stake=1000)

# Print out the starting conditions
print('Starting Portfolio Value: %.2f' % cerebro.broker.getvalue())

# Run over everything
results = cerebro.run()

In [None]:
# Print out the final result
print('Final Portfolio Value: %.2f' % cerebro.broker.getvalue())
r = results[0].analyzers.custom.get_analysis()
cumsum = np.asarray(r['trades']).cumsum()

avg_wfe = np.round(np.asarray(wfe_list).mean(),1)
print('Average WFE: {}%'.format(avg_wfe))

plt.figure(figsize=(10,5))
plt.plot(cumsum)
plt.title('Walkforward Equity Curve')
plt.xlabel('Trades')
plt.ylabel('Equity')

cerebro.plot(volume=False, iplot=False)