In [1]:

import datetime
import os.path, sys
import backtrader as bt
import pandas as pd

from sklearn.model_selection import TimeSeriesSplit
from sklearn.utils import indexable
from sklearn.utils.validation import _num_samples
import numpy as np

## Strategy

In [2]:
 
# https://ntguardian.wordpress.com/2017/06/19/walk-forward-analysis-demonstration-backtrader/
class TimeSeriesSplitImproved(TimeSeriesSplit):
    """Time Series cross-validator
    Provides train/test indices to split time series data samples
    that are observed at fixed time intervals, in train/test sets.
    In each split, test indices must be higher than before, and thus shuffling
    in cross validator is inappropriate.
    This cross-validation object is a variation of :class:`KFold`.
    In the kth split, it returns first k folds as train set and the
    (k+1)th fold as test set.
    Note that unlike standard cross-validation methods, successive
    training sets are supersets of those that come before them.
    Read more in the :ref:`User Guide `.
    Parameters
    ----------
    n_splits : int, default=3
        Number of splits. Must be at least 1.
    Examples
    --------
    >>> from sklearn.model_selection import TimeSeriesSplit
    >>> X = np.array([[1, 2], [3, 4], [1, 2], [3, 4]])
    >>> y = np.array([1, 2, 3, 4])
    >>> tscv = TimeSeriesSplit(n_splits=3)
    >>> print(tscv)  # doctest: +NORMALIZE_WHITESPACE
    TimeSeriesSplit(n_splits=3)
    >>> for train_index, test_index in tscv.split(X):
    ...    print("TRAIN:", train_index, "TEST:", test_index)
    ...    X_train, X_test = X[train_index], X[test_index]
    ...    y_train, y_test = y[train_index], y[test_index]
    TRAIN: [0] TEST: [1]
    TRAIN: [0 1] TEST: [2]
    TRAIN: [0 1 2] TEST: [3]
    >>> for train_index, test_index in tscv.split(X, fixed_length=True):
    ...     print("TRAIN:", train_index, "TEST:", test_index)
    ...     X_train, X_test = X[train_index], X[test_index]
    ...     y_train, y_test = y[train_index], y[test_index]
    TRAIN: [0] TEST: [1]
    TRAIN: [1] TEST: [2]
    TRAIN: [2] TEST: [3]
    >>> for train_index, test_index in tscv.split(X, fixed_length=True,
    ...     train_splits=2):
    ...     print("TRAIN:", train_index, "TEST:", test_index)
    ...     X_train, X_test = X[train_index], X[test_index]
    ...     y_train, y_test = y[train_index], y[test_index]
    TRAIN: [0 1] TEST: [2]
    TRAIN: [1 2] TEST: [3]
 
    Notes
    -----
    When ``fixed_length`` is ``False``, the training set has size
    ``i * train_splits * n_samples // (n_splits + 1) + n_samples %
    (n_splits + 1)`` in the ``i``th split, with a test set of size
    ``n_samples//(n_splits + 1) * test_splits``, where ``n_samples``
    is the number of samples. If fixed_length is True, replace ``i``
    in the above formulation with 1, and ignore ``n_samples %
    (n_splits + 1)`` except for the first training set. The number
    of test sets is ``n_splits + 2 - train_splits - test_splits``.
    """
 
    def split(self, X, y=None, groups=None, fixed_length=False,
              train_splits=1, test_splits=1):
        """Generate indices to split data into training and test set.
        Parameters
        ----------
        X : array-like, shape (n_samples, n_features)
            Training data, where n_samples is the number of samples
            and n_features is the number of features.
        y : array-like, shape (n_samples,)
            Always ignored, exists for compatibility.
        groups : array-like, with shape (n_samples,), optional
            Always ignored, exists for compatibility.
        fixed_length : bool, hether training sets should always have
            common length
        train_splits : positive int, for the minimum number of
            splits to include in training sets
        test_splits : positive int, for the number of splits to
            include in the test set
        Returns
        -------
        train : ndarray
            The training set indices for that split.
        test : ndarray
            The testing set indices for that split.
        """
        X, y, groups = indexable(X, y, groups)
        n_samples = _num_samples(X)
        n_splits = self.n_splits
        n_folds = n_splits + 1
        train_splits, test_splits = int(train_splits), int(test_splits)
        if n_folds > n_samples:
            raise ValueError(
                ("Cannot have number of folds ={0} greater"
                 " than the number of samples: {1}.").format(n_folds,
                                                             n_samples))
        #if (n_folds - train_splits - test_splits)  0 and test_splits > 0):
        #    raise ValueError(
        #        ("Both train_splits and test_splits must be positive"
        #         " integers."))
        indices = np.arange(n_samples)
        split_size = (n_samples // n_folds)
        test_size = split_size * test_splits
        train_size = split_size * train_splits
        test_starts = range(train_size + n_samples % n_folds,
                            n_samples - (test_size - split_size),
                            split_size)
        if fixed_length:
            for i, test_start in zip(range(len(test_starts)),
                                     test_starts):
                rem = 0
                if i == 0:
                    rem = n_samples % n_folds
                yield (indices[(test_start - train_size - rem):test_start],
                       indices[test_start:test_start + test_size])
        else:
            for test_start in test_starts:
                yield (indices[:test_start],
                    indices[test_start:test_start + test_size])
                
llNetValue = {0:{0:0} }
print(llNetValue)
# Create a Stratey
class TestStrategy(bt.Strategy):
    params = (
        ('maperiod', 15),       # fast
        ('printlog', False), # False
        ('emaPeriod', 20),      # slow
        ("optimize", False), 
        ("optimize_fs", (15, 20))
    )
    nTradeCount = 0
    
    def log(self, txt, dt=None, doprint=False):
        ''' Logging function fot this strategy'''
        if self.params.printlog or doprint:
            dt = dt or self.datas[0].datetime.date(0)
            print('%s, %s' % (dt.isoformat(), txt))

    def __init__(self):
        # Keep a reference to the "close" line in the data[0] dataseries
        self.dataclose = self.datas[0].close

        # To keep track of pending orders and buy price/commission
        self.order = None
        self.buyprice = None
        self.buycomm = None

        # Add a MovingAverageSimple indicator
        # self.datas[0] equal self.data
        self.sma = bt.indicators.SimpleMovingAverage(
            self.datas[0], period=self.params.maperiod)
        self.ema1 = bt.indicators.ExponentialMovingAverage( 
                period = self.params.emaPeriod)

    def notify_order(self, order):
        if order.status in [order.Submitted, order.Accepted]:
            # Buy/Sell order submitted/accepted to/by broker - Nothing to do
            return

        # Check if an order has been completed
        # Attention: broker could reject order if not enough cash
        if order.status in [order.Completed]:
            if order.isbuy():
                self.log(
                    'BUY EXECUTED, Price: %.2f, Cost: %.2f, Comm %.2f' %
                    (order.executed.price,
                     order.executed.value,
                     order.executed.comm))

                self.buyprice = order.executed.price
                self.buycomm = order.executed.comm
            else:  # Sell
                self.log('SELL EXECUTED, Price: %.2f, Cost: %.2f, Comm %.2f' %
                         (order.executed.price,
                          order.executed.value,
                          order.executed.comm))

            self.bar_executed = len(self)

        elif order.status in [order.Canceled, order.Margin, order.Rejected]:
            self.log('Order Canceled/Margin/Rejected')

        # Write down: no pending order
        self.order = None

    def notify_trade(self, trade):
        if not trade.isclosed:
            return

        self.log('OPERATION PROFIT, GROSS %.2f, NET %.2f' %
                 (trade.pnl, trade.pnlcomm))
        self.nTradeCount = self.nTradeCount + 1
        
    def next(self):
        # Simply log the closing price of the series from the reference
        self.log('Close, %.2f' % self.dataclose[0])

        # Check if an order is pending ... if yes, we cannot send a 2nd one
        if self.order:
            return

        # Check if we are in the market
        if not self.position:
            # self.dataclose[0] == self.data.close[0]
            #print("dataclose[0]=%.2f, sma[0]=%.2f" % (self.dataclose[0], self.sma[0]))
            # Not yet ... we MIGHT BUY if ...
            if self.dataclose[0] > self.sma[0] and self.dataclose[0] > self.ema1[0] and self.sma[0] > self.ema1[0]:
                # BUY, BUY, BUY!!! (with all possible default parameters)
                self.log('BUY CREATE, %.2f' % self.dataclose[0])
                # Keep track of the created order to avoid a 2nd order
                self.order = self.buy()
        else:
            if self.dataclose[0] < self.sma[0] and self.dataclose[0] < self.ema1[0] and self.sma[0] < self.ema1[0]:
                # SELL, SELL, SELL!!! (with all possible default parameters)
                self.log('SELL CREATE, %.2f' % self.dataclose[0])
                # Keep track of the created order to avoid a 2nd order
                self.order = self.sell()

    def stop(self):
        self.log('(SMA Period %2d, EMA Period %2d) Ending Value %.2f, tradeCount %d' %
                 (self.params.maperiod, self.params.emaPeriod, self.broker.getvalue(), self.nTradeCount)
                 , doprint=True)
        #llNetValue[self.params.maperiod][self.params.emaPeriod] = self.broker.getvalue()
        
class FixedSlippageAndCommisionScheme(bt.CommInfoBase):
    '''Use this for both slippage and commissions together'''
    params = (
        ('commission', 10),
        ('stocklike', True),
        ('commtype', bt.CommInfoBase.COMM_FIXED),
        )

    def _getcommission(self, size, price, pseudoexec):
        return self.p.commission

class AcctStats(bt.Analyzer):
    """A simple analyzer that get the gain """
    def __init__(self):
        self.startValue = self.strategy.broker.get_value()
        self.endValue = None
    def stop(self):
        self.endValue = self.strategy.broker.get_value()
    def get_analysis(self):
        return {"startValue":self.startValue, "endValue":self.endValue, 
                "valueGrowth":self.endValue - self.startValue, "ratio": self.endValue / self.startValue } 
    
# renc, 
# parameter: data sample period   
# parameter: simple moving average period, 
     

# after finding the best performing parameters, 
# apply these parameters for your out-of-sample period.
def outOfSample(inMaPeriod, inEmaPeriod, inDfTest, outList, isPlot=False):
    print("-- Out-of-Sample testing -- ")
    # Create a cerebro entity
    cerebro = bt.Cerebro()
    # Add a strategy
    cerebro.addstrategy(TestStrategy, maperiod=inMaPeriod, emaPeriod=inEmaPeriod)
    dataFeed = bt.feeds.PandasData(dataname = inDfTest)
    # Add the Data Feed to Cerebro
    cerebro.adddata(dataFeed)
    # Set our desired cash start
    cerebro.broker.setcash(100000.0)
    cerebro.addsizer(bt.sizers.FixedSize, stake=1000)
    # We're trading futures, so we'll use the "commissions" to handle both slippage and commissions
    slippage_and_comms = FixedSlippageAndCommisionScheme()
    cerebro.broker.addcommissioninfo(slippage_and_comms)

    # Print out the starting conditions
    print('Starting Portfolio Value: %.2f' % cerebro.broker.getvalue())
    # Run over everything
    results = cerebro.run()
    # Print out the final result
    print('Final Portfolio Value: %.2f' % cerebro.broker.getvalue()) 
    outList.append( cerebro.broker.getvalue() )
    if isPlot:
        cerebro.plot(volume=False, iplot=False) # do not plot, otherwise block the following exexc
    
    
# Load data
# CL: crude oil; GC: gold; ES: S&P 500. All futures        
fname_symbol = "CL" # "GC" # "ES" #'CL' 
folder_name = '5min'
suffix = '5min_20160103_20190405'

# using the dataframe will be more flecible.
df = pd.read_parquet(os.path.join('../data/processed/{}/'.format(folder_name),
                                  '{}_{}.parquet'.format(fname_symbol, suffix)))
df = (df.resample('1h', label='left', base=18).agg(
        {'Open': 'first', 'High': 'max', 'Low': 'min', 
         'Close': 'last', 'Volume': 'sum'}))
df.columns = [col_name.lower() for col_name in df.columns]
df = df.dropna()
dfTrain = df['2016-01-01':'2018-01-01']
dfTest = df['2018-01-01':'2018-06-01']
      
def inSampleOptimization():
    print("In-Sample training")

    #data = bt.feeds.PandasData(dataname = df['2016-01-01':'2018-01-01'])
    #data = bt.feeds.PandasData(dataname = dfTrain, name=(fname_symbol+"_"+folder_name) )
    
    tscv = TimeSeriesSplitImproved(10)
    #split = tscv.split(datafeeds["AAPL"], fixed_length=True, train_splits=2) # from tutorial
    split = tscv.split(dfTrain, fixed_length=True, train_splits=2)
    # to show the dataset 
    
    outResults = list()
    
    iCountTrain = 0
    for train, test in split:
        #print(train[0], "-", train[-1], test[0], "-", test[-1])
        print("-- Training : " + str(iCountTrain) + " --\n")
        iCountTrain = iCountTrain + 1
        
        # Create a cerebro entity
        cerebro = bt.Cerebro()
         
        # Add a strategy, this is different from level02
        strats = cerebro.optstrategy(
            TestStrategy
            , maperiod=range(1, 21)
            , emaPeriod=range(11, 31)
        )
        
        # Add the Data Feed to Cerebro
        #cerebro.adddata(data) # only for one data source before walk forward
        dfTrainWF = df.iloc[train]
        dataFeedTrainWF = bt.feeds.PandasData(dataname = dfTrainWF)
        cerebro.adddata(dataFeedTrainWF)
        # Set our desired cash start
        startCash = 100000.0
        cerebro.broker.setcash( startCash )    
        # Add a FixedSize sizer according to the stake
        cerebro.addsizer(bt.sizers.FixedSize, stake=1000)
        
        # We're trading futures, so we'll use the "commissions" to handle both slippage and commissions
        slippage_and_comms = FixedSlippageAndCommisionScheme()
        cerebro.broker.addcommissioninfo(slippage_and_comms)    
        
        # Print out the starting conditions
        print('Starting Portfolio Value: %.2f' % cerebro.broker.getvalue())    
        # Run over everything
        #results = cerebro.run(maxcpus=1)
        results = cerebro.run(optreturn=False)
        """    
        # Print out the final result
        print('Final Portfolio Value: %.2f' % cerebro.broker.getvalue()) 
        print("type(results), ", type(results), ", len:", len(results), 
              ", len of results[0]: ", len(results[0]), ", type: ", type(results[0])
              )
        print( results[0] )
        print( results[0][0] )
        optReturn = results[0][0] 
        print( dir(optReturn) )
        print( "analyzers, ", optReturn.analyzers, dir(optReturn.analyzers) )
        print( "p, ", optReturn.p, dir(optReturn.analyzers) )
        print( "params, ", dir(optReturn.params), optReturn.params.maperiod, optReturn.params.emaPeriod ) 
        """
        # generate results
        result_list = []
        for run in results:
            for strategy in run:
                value = round(strategy.broker.getvalue(), 2)
                PnL = round(value - startCash, 2)
                result_list.append([strategy.params.maperiod, strategy.params.emaPeriod, PnL])
        by_PnL = sorted(result_list, key=lambda x: x[2], reverse=True)
        print(by_PnL)
        
        # testing 
        dfTestWF = df.iloc[test]
        outValue = list()
        outOfSample(by_PnL[0][0], by_PnL[0][1], dfTestWF, outValue)
        outResults.append([by_PnL[0][0], by_PnL[0][1], outValue[0]])
    #
    print("-- \n")
    print("-- print the results --\n")
    outResults_byPnL = sorted(outResults, key=lambda x: x[2], reverse=True)
    print(outResults_byPnL)
    print("-- run the test with best para -- \n")
    outOfSample( outResults_byPnL[0][0], outResults_byPnL[0][1], dfTest, (),True)
    
    print("-- end --\n")

"""
print("__name__ =" + __name__ )
if __name__ == "__main__":
    print("Hello world")     
    print("sys.argv: ", sys.argv) 
    print(os.path.abspath(sys.argv[0]))
    inSampleOptimization() 
"""         


{0: {0: 0}}


'\nprint("__name__ =" + __name__ )\nif __name__ == "__main__":\n   print("Hello world")     \n   print("sys.argv: ", sys.argv) \n   print(os.path.abspath(sys.argv[0]))\n   inSampleOptimization() \n'

## Strategy Plot

In [None]:
outOfSample( 13, 26, dfTest, list(),True)

-- Out-of-Sample testing -- 
Starting Portfolio Value: 100000.00
2018-06-01, (SMA Period 13, EMA Period 26) Ending Value 104920.00, tradeCount 31
Final Portfolio Value: 104920.00
