# WIP: Back-test

This example is still WIP.
* Load Data
* Back Test - the Basic Idea

In [1]:
from grandma_stock_valuation import FileLogger, loadPacakgeData
from grandma_stock_valuation import batchValuation, addCashPortfolio, getCorrelationWeight, allocatePortfolio

# Refer to example_0_FileLogger.ipynb for details of the FileLogger.
logger = FileLogger()
logPrint = logger.logPandas

### Load Data

For this example, I will use the stored package data.

To query data from Yahoo, please refer to *example_1_yahoo_data_loader.ipynb*.

In [2]:
d_instrument_data, d_instrument = loadPacakgeData(verbose=2)

d_instrument

VPL data contains 4273 rows, 4273 dates from 2005-03-10 to 2022-02-28.
IVV data contains 5479 rows, 5479 dates from 2000-05-19 to 2022-02-28.
EEMA data contains 2530 rows, 2530 dates from 2012-02-09 to 2022-02-28.
IEV data contains 5431 rows, 5431 dates from 2000-07-28 to 2022-02-28.


{'IVV': 'SP500',
 'VPL': 'Developed Asia-Pacific',
 'IEV': 'Europe',
 'EEMA': 'Emerging Asia'}

In [3]:
logPrint("Keys of d_instrument_data:", str(d_instrument_data.keys()))

logPrint("IVV (SP500 ETF):", d_instrument_data['IVV'].head())

2022-03-06 17:49:47,147 INFO Keys of d_instrument_data: dict_keys(['VPL', 'IVV', 'EEMA', 'IEV'])
2022-03-06 17:49:47,154 INFO IVV (SP500 ETF): 
        date       open       high        low      close  close_adj   volume
0 2000-05-19  142.65625  142.65625  140.25000  140.68750  94.121216   775500
1 2000-05-22  140.59375  140.59375  136.81250  139.81250  93.535789  1850600
2 2000-05-23  140.21875  140.21875  137.68750  137.68750  92.114151   373900
3 2000-05-24  137.75000  140.06250  136.65625  139.75000  93.494003   400300
4 2000-05-25  140.03125  140.93750  137.87500  138.46875  92.636810    69600



### Back Test - the Basic Idea

WIP
* back test is to understand how your strategy behaves under various scenanios, not to prove your strategy can beat xxx.
* most investment products are not built to beat xxx.
* historical performance cannot represent future performance.


### Back Test - WIP

In [4]:

backtest_years = 10
minimum_training_years = 5
adjust_freq_months = 1

init_parameters={'recent_months':0, 'train_years':10, 'date_end':None}
fit_parameters={'price_col':'close_adj', 'log':True, 'n_std':1.5}
valuate_parameters={'min_annual_return':0.01}

with_cash = True
with_correlation_weights = True
verbose = 1
printfunc = print

import numpy as np
import pandas as pd


########
maximum_training_years = init_parameters['train_years']
date_end = init_parameters['date_end']
price_col = fit_parameters['price_col']




In [5]:
def _cleanInputData(d_instrument_data):
    # clean price data, get start and end date of each ticker
    d_instrument_prices = {}
    d_instrument_start_dates = {}
    d_instrument_end_dates = {}
    for ticker, df_prices in d_instrument_data.items():
        df_prices = df_prices[df_prices[price_col] > 0][['date', price_col]].reset_index(drop=True)
        df_prices['date'] = pd.to_datetime(df_prices['date'])
        d_instrument_prices[ticker] = df_prices
        d_instrument_start_dates[ticker] = df_prices['date'].min()
        d_instrument_end_dates[ticker] = df_prices['date'].max()

    # dates of back-test periods
    if date_end is None:
        backtest_end_date = max(d_instrument_end_dates.values())
    else:
        backtest_end_date = pd.to_datetime(date_end)

    backtest_start_date = backtest_end_date - pd.DateOffset(years=backtest_years)
    if verbose > 0: printfunc(f"To backtest {backtest_years} years, from {backtest_start_date.date()} to {backtest_end_date.date()}")

    at_least_start_date = backtest_start_date - pd.DateOffset(years=minimum_training_years)
    at_most_start_date = backtest_start_date - pd.DateOffset(years=maximum_training_years)

    for ticker, d in d_instrument_start_dates.items():
        if d > at_least_start_date:
            printfunc(f"{ticker}'s start date {d.date()} is beyond {at_least_start_date.date()} for full back-test.")

    # combine cleaned price data into one dateframe
    df_instrument_prices = pd.DataFrame()
    for ticker, df_prices in d_instrument_prices.items():
        df = df_prices[df_prices['date']>=at_most_start_date].copy()
        df.rename(columns={price_col:'price_'+ticker}, inplace=True)

        if len(df_instrument_prices)==0:
            df_instrument_prices = df.copy()
            continue
        else:
            df_instrument_prices = df_instrument_prices.merge(df, 'outer', 'date')

    df_instrument_prices = df_instrument_prices.sort_values('date').reset_index(drop=True)
    df_instrument_prices.fillna(method='pad', inplace=True)

    index_start = df_instrument_prices.index[df_instrument_prices['date'].tolist().index(backtest_start_date)]
    df_instrument_prices.index = df_instrument_prices['date']
    df_instrument_prices.drop(columns='date', inplace=True)

    return df_instrument_prices, d_instrument_start_dates, d_instrument_end_dates, index_start, backtest_start_date, backtest_end_date


df_instrument_prices, d_instrument_start_dates, d_instrument_end_dates, index_start, backtest_start_date, backtest_end_date = _cleanInputData(d_instrument_data)


To backtest 10 years, from 2012-02-28 to 2022-02-28
EEMA's start date 2012-02-09 is beyond 2007-02-28 for full back-test.


In [6]:
def _updateValue(df_portfolio_i, df_instrument_prices, index_i):
    df_new_price = df_instrument_prices.iloc[index_i].reset_index()
    df_new_price.columns = ['ticker', 'new_price']
    df_new_price['ticker'] = df_new_price['ticker'].str.replace('price_','')

    df_portfolio_i = df_portfolio_i.merge(df_new_price, 'left', 'ticker')

    index_null = df_portfolio_i['current_price'].isnull() | df_portfolio_i['current_value'].isnull() | df_portfolio_i['new_price'].isnull()
    df_portfolio_i.loc[~index_null, 'current_value'] = (df_portfolio_i['current_value'] * df_portfolio_i['new_price'] / df_portfolio_i['current_price'])[~index_null] 
    df_portfolio_i['current_price'] = df_portfolio_i['new_price']
    df_portfolio_i.drop(columns='new_price', inplace=True)

    total_value = df_portfolio_i['current_value'].sum()

    return df_portfolio_i, total_value

In [7]:
def _getHistoricalData(df_instrument_prices, index_i, price_col):
    d_instrument_data_i = {}
    for col in df_instrument_prices.columns:
        ticker = col.replace('price_','')
        df = df_instrument_prices[col].iloc[:index_i+1].copy().rename(price_col).dropna().reset_index()

        if len(df)>0:
            d_instrument_data_i[ticker] = df
    
    return d_instrument_data_i


In [8]:
def getAllocation(d_instrument_data_i, total_value, verbose=0):
    df_metrics_i, _ = batchValuation(
        d_instrument_data=d_instrument_data_i,
        init_parameters=init_parameters,
        fit_parameters=fit_parameters,
        valuate_parameters=valuate_parameters,
        draw_figure=False,
        save_result=False,
        metric_file = None,
        figure_folder = None,
        verbose=verbose,
        printfunc=printfunc
    )

    valid_tickers = df_metrics_i[df_metrics_i['over_value_years'].notnull()]['ticker'].to_list()

    if with_cash:
        df_metrics_i = addCashPortfolio(df_metrics_i)

    if with_correlation_weights:
        weights = getCorrelationWeight(
            d_instrument_prices={k:v for k,v in d_instrument_data_i.items() if k in valid_tickers},
            price_col=price_col,
            recent_months=init_parameters['recent_months'],
            train_years=init_parameters['train_years'],
            with_cash=with_cash,
            verbose=verbose,
            printfunc=printfunc
        )
    else:
        n_inst = len(valid_tickers) + with_cash
        w_inst = 1/n_inst if n_inst>0 else np.nan
        weights = {t:w_inst for t in valid_tickers + ['cash']*with_cash}

    df_metrics_i['weight'] = df_metrics_i['ticker'].apply(lambda t: weights.get(t, np.nan))

    df_metrics_i['portfolio_allocation'] = allocatePortfolio(
        df_metrics_i['over_value_years'],
        transformation='exponential',
        scale=1,
        with_cash=True,
        weights=df_metrics_i['weight']
    )

    df_metrics_i['current_value'] = total_value * df_metrics_i['portfolio_allocation']

    return df_metrics_i


In [9]:
d_total_value = {}
d_adjustments = {}
d_portfolio = {}

# initialize
total_value_start = 1
dt = df_instrument_prices.index[index_start]

d_instrument_data_i = _getHistoricalData(df_instrument_prices, index_start, price_col)
df_metrics_i = getAllocation(d_instrument_data_i, total_value=total_value_start)

cols_select = ['ticker','current_price','current_value']
df_portfolio_i = df_metrics_i[cols_select].copy()

d_total_value[dt] = total_value_start
d_adjustments[dt] = df_metrics_i
d_portfolio[dt] = df_portfolio_i

next_adjust_date = backtest_start_date + pd.DateOffset(months=adjust_freq_months)

for index_i in range(index_start+1, len(df_instrument_prices)):

    dt = df_instrument_prices.index[index_i]

    df_portfolio_i, total_value = _updateValue(df_portfolio_i, df_instrument_prices, index_i)

    if dt >= next_adjust_date:
        next_adjust_date = next_adjust_date + pd.DateOffset(months=adjust_freq_months)

        d_instrument_data_i = _getHistoricalData(df_instrument_prices, index_i, price_col)
        df_metrics_i = getAllocation(d_instrument_data_i, total_value=total_value)
        df_portfolio_i = df_metrics_i[cols_select].copy()

        d_adjustments[dt] = df_metrics_i
        if verbose > 0: printfunc(f"Adjust portfolio on {dt.date()}, total value = {total_value}")
    
    d_total_value[dt] = total_value
    d_portfolio[dt] = df_portfolio_i

if verbose > 0: printfunc(f"final portfolio increased by {total_value/total_value_start-1:.3f} over {backtest_years} years, which is {(total_value/total_value_start)**(1/backtest_years)-1:.4f} annualized growth.")

Adjust portfolio on 2012-03-28, total value = 0.9885520169459421
Adjust portfolio on 2012-04-30, total value = 0.9748133608684513
Adjust portfolio on 2012-05-29, total value = 0.9134643771170381
Adjust portfolio on 2012-06-28, total value = 0.9190387613931946
Adjust portfolio on 2012-07-30, total value = 0.9572586351469142
Adjust portfolio on 2012-08-28, total value = 0.9840512330410469
Adjust portfolio on 2012-09-28, total value = 0.9994583799842225
Adjust portfolio on 2012-10-31, total value = 1.0076669708342296
Adjust portfolio on 2012-11-28, total value = 1.014503959656102
Adjust portfolio on 2012-12-28, total value = 1.0308107807697102
Adjust portfolio on 2013-01-28, total value = 1.057254618795115
Adjust portfolio on 2013-02-28, total value = 1.0554934794188913
Adjust portfolio on 2013-03-28, total value = 1.0469991377653831
Adjust portfolio on 2013-04-29, total value = 1.0610627838823392
Adjust portfolio on 2013-05-28, total value = 1.0634478181855167
Adjust portfolio on 2013-06

In [13]:
d_portfolio[list(d_portfolio.keys())[1000]]



Unnamed: 0,ticker,current_price,current_value
0,VPL,43.519577,0.097558
1,IVV,173.078369,0.109027
2,EEMA,41.801369,0.533416
3,IEV,31.408989,0.138709
4,cash,,0.138525
