In [2]:
from datetime import datetime
import os
import scipy.stats as stats
from pathlib import Path
import statsmodels.api as sm
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
import matplotlib.ticker as ticker

In [None]:
class BacktestStatistican(): 
    
    # Where the market data file is located
    # Must have columns for 'mkt_index' (cumulative index of "market" returns) and
    # 'rf_index' (cumulative index of "risk-free" returns)
    market_data_path = 'C:\\Dropbox\\Jupyter notebooks\\Backtest Demo\\Data\\mkt_rf_daily.csv'
    
    # Constructor stores references to the portfolio_db object and strategy_info dictionary
    #
    # strategy_info dictionary must have:
    #     'brief descriptor': description used for folder name 
    #     'plot descriptor': description used for plot header
    #     'universe': brief explanation of strategy's universe
    #     'signals': brief explanation of strategy's signals
    #     'trading rule': brief explanation of strategy's trading rule
    #     'holding period': strategy holding period (string)
    #     'periods per year': periods per year (number)
    #     'time lag': description of strategy's lag between signal time and its availability for trading
    #     'output folder name': folder to put all the output subfolders in
    def __init__(self, portfolio_db, strategy_info):
        self.portfolio_db = portfolio_db
        self.strategy_info = strategy_info
        # Get the folder this all goes in
        self.output_folder_path = self.make_backtest_path(strategy_info['output folder name'])
      
    # Outputs statistics and plots to files
    def output_stats(self):               
        
        # Output full csvs for account_history and trades
        self.portfolio_db.account_history_df.to_csv( self.output_folder_path / 'account_history.csv' )
        self.portfolio_db.trades_df.to_csv( self.output_folder_path / 'trades.csv' )
        
        # Compute and output main stats, which will go in this dictionary
        backtest_stats = dict()
        
        # Get the return series from the account history
        ret_series = self.portfolio_db.account_history_df.loc[:,'nav'].pct_change().dropna().reset_index(drop=True)
        
        # Number of periods per year for annualization
        N = self.strategy_info['periods per year']
        
        # means
        backtest_stats['arith mean'] = ret_series.mean()
        backtest_stats['arith mean (ann)'] = ret_series.mean()*N
        backtest_stats['geomean mean'] = stats.gmean(1+ret_series)-1
        backtest_stats['geomean mean (ann)'] = stats.gmean(1+ret_series)**N-1
        
        # risks
        backtest_stats['sigma'] = ret_series.std()
        backtest_stats['sigma (ann)'] = ret_series.std()*(N**0.5)
        drawdown = 1-self.portfolio_db.account_history_df.loc[:,'nav'] / self.portfolio_db.account_history_df.loc[:,'nav'].cummax()
        backtest_stats['avg drawdown'] = drawdown.mean()
        backtest_stats['max drawdown'] = drawdown.max()
        
        # alpha/beta
        mkt_rf_df = pd.read_csv(self.market_data_path)
        mkt_rf_df['date'] = pd.to_datetime(mkt_rf_df.loc[:,'date'],format='%Y%m%d')
        mkt_rf_df = mkt_rf_df.loc[mkt_rf_df.loc[:,'date'].isin(self.portfolio_db.account_history_df.loc[:,'datetime']),:]
        mkt_series = mkt_rf_df.loc[:,'mkt_index'].pct_change().dropna().reset_index(drop=True)
        rf_series = mkt_rf_df.loc[:,'rf_index'].pct_change().dropna().reset_index(drop=True)
        
        # Set up left- and right-hand sides for regressions
        lhs = ret_series
        rhs = sm.add_constant( mkt_series - rf_series )
        
        # Run the regression 
        model = sm.OLS(lhs,rhs) 
        results = model.fit()
        
        # extract alpha and beta
        backtest_stats['alpha'] = results.params['const']
        backtest_stats['alpha SE'] = results.bse['const']
        backtest_stats['alpha (ann)'] = results.params['const']*N
        backtest_stats['alpha SE (ann)'] = results.bse['const']*N
        backtest_stats['beta'] = results.params[0]
        backtest_stats['betaSE'] = results.bse[0]        
        
        # ratios        
        backtest_stats['Sharpe ratio'] = backtest_stats['arith mean'] / backtest_stats['sigma']
        backtest_stats['Sharpe ratio (ann)'] = backtest_stats['arith mean (ann)'] / backtest_stats['sigma (ann)']
        backtest_stats['information ratio'] = backtest_stats['alpha'] / backtest_stats['sigma']
        backtest_stats['information ratio (ann)'] = backtest_stats['alpha (ann)'] / backtest_stats['sigma (ann)']
        
        # Turn into a dataframe and output as csv
        pd.DataFrame.from_dict(data=backtest_stats, orient='index').to_csv(self.output_folder_path / 'backtest_stats.csv')
        
        self.nav_plot()
        
    # Make and output the cumulative NAV history plot    
    def nav_plot(self):

        plt.style.use('default')  # set plot style to 'default' 

        # Standard set-up for a matplotlib plot
        fig = plt.figure() # this will automatically show the most updated version of the plot after you run the cell, so no need to have a "fig" line later again
        ax = fig.add_subplot(1,1,1)
        plotdates = self.portfolio_db.account_history_df.loc[:,'datetime']
        ax.plot(plotdates, self.portfolio_db.account_history_df.loc[:,'nav'] / self.portfolio_db.account_history_df.loc[:,'nav'].iloc[0]) 
        
        # Customize the plot properties
        ax.set_xlabel('Date')
        ax.set_yscale('log')
        ax.yaxis.set_major_locator( ticker.LogLocator(base=2) )        
        ax.yaxis.set_minor_locator( ticker.LogLocator(base=2, subs=[1.25, 1.5, 1.75]))
        ax.yaxis.set_major_formatter( ticker.ScalarFormatter() )
        ax.yaxis.set_minor_formatter( ticker.NullFormatter() )
        ax.set_ylabel(self.strategy_info['plot descriptor'])
        ax.set_xlim(plotdates.min(), plotdates.max())
        ax.xaxis.set_major_formatter( mdates.DateFormatter('%Y'))  # format the tick labels using years
        
        # Save in our folder
        plt.savefig(self.output_folder_path / 'nav_plot.png', bbox_inches='tight', dpi=300)        
        
    # Create the path object for the folder used for the backtest output
    # The folder name will look like stratname_YYYYmmddHHMm
    def make_backtest_path(self, output_folder):
        output_path = Path(output_folder) 
        
        # This is the output name we want
        folder_name = '{0}_{1}'.format(self.strategy_info['brief descriptor'], datetime.now().strftime('%Y%m%d%H%M'))
        self.output_folder_path = Path(output_path / folder_name)  
    
        # It's possible the folder already exists, if it does add a ' (1)' or ' (2)' or however high we need to go
        # This follows the Windows convention for duplicate files
        num_attempts = 1
        while( self.output_folder_path.exists() ):
            folder_name_conflict = folder_name + ' ({0})'.format(num_attempts)
            self.output_folder_path = Path(output_path / folder_name_conflict)  
            num_attempts += 1
            
        self.output_folder_path.mkdir(parents=True)
        
        return self.output_folder_path
        