In [1]:
import numpy as np
import pandas as pd

from model.grandma_valuation import GrandmaRegression
from model.portfolio_allocator import allocatePortfolio


In [2]:
"""
SP500 - 2000 May
IEV - 2000 Jul
VPL - 2005 Mar

greater china -- 2010 sep
ASEA 2011 Feb
"""

d_instrument = {
    'SP500':'IVV',
    'Europe':'IEV',
    'Greater China':'3073.HK',
    'Developed Asia-Pacific': 'VPL',
    'SE Asia': 'ASEA'
}

price_col = 'close_adj'
file_folder = '__data__'
file_suffix = '_EOD.csv.gz'

end_date = None # '2022-01-31'
backtest_years = 5
minimum_training_years = 5
maximum_training_years = 10


In [3]:
from os import path

d_data = {}
backtest_end_date = None
for name, ticker in d_instrument.items():
    df = pd.read_csv(path.join(file_folder, ticker+file_suffix))
    df['date'] = pd.to_datetime(df['date'])
    df = df[df[price_col]>0].sort_values('date').reset_index(drop=True)
    print(f"Loaded {name}({ticker}) data contains {len(df)} rows over {df['date'].nunique()} dates from {df['date'].min().date()} to {df['date'].max().date()}.")

    d_data[name] = df
    if end_date is None:
        if backtest_end_date is None:
            backtest_end_date = df['date'].max()
        else:
            backtest_end_date = max(backtest_end_date, df['date'].max())
    else:
        backtest_end_date = pd.to_datetime(end_date)

backtest_start_date = backtest_end_date - pd.DateOffset(years=backtest_years)
print(f"To backtest {backtest_years} years, from {backtest_start_date.date()} to {backtest_end_date.date()}")


Loaded SP500(IVV) data contains 5469 rows over 5469 dates from 2000-05-19 to 2022-02-11.
Loaded Europe(IEV) data contains 5421 rows over 5421 dates from 2000-07-28 to 2022-02-11.
Loaded Greater China(3073.HK) data contains 2812 rows over 2812 dates from 2010-09-15 to 2022-02-11.
Loaded Developed Asia-Pacific(VPL) data contains 4263 rows over 4263 dates from 2005-03-10 to 2022-02-11.
Loaded SE Asia(ASEA) data contains 2766 rows over 2766 dates from 2011-02-17 to 2022-02-11.
To backtest 5 years, from 2017-02-11 to 2022-02-11


In [4]:

def getAllocation(total_dollar, d_end_date, d_data=d_data, price_col=price_col, minimum_training_years=minimum_training_years, maximum_training_year=maximum_training_years):

    l_metrics = []
    for name, data in d_data.items():

        if data['date'].min() + pd.DateOffset(years=minimum_training_years) > d_end_date:
            print(f"{name} had no sufficient training data asof {d_end_date.date()}.")
            df_metrics = pd.DataFrame()

        else:
            df = data[data['date']<=d_end_date].copy()
            grandma = GrandmaRegression(recent_months=0, train_years=maximum_training_years, verbose=0)
            df_train, df_recent = grandma.fitTransform(df, price_col=price_col)
            d_metrics = grandma.evaluateValuation()
            df_metrics = pd.Series(d_metrics).to_frame().T
            df_metrics['name'] = name

        l_metrics.append(df_metrics)

    # Consolidate valuation metrics
    df_metrics = pd.concat(l_metrics).reset_index(drop=True)

    cols_first = ['name']
    df_metrics = df_metrics[cols_first + list(df_metrics.columns.drop(cols_first))]

    # Allocate portfolio
    df_metrics['portfolio_allocation'] = allocatePortfolio(df_metrics['over_value_years'], transformation='exponential', scale=None).fillna(0)

    metrics_cols = ['name','over_value_years','currenct_price','portfolio_allocation']
    df_portfolio = df_metrics[metrics_cols].copy()
    df_portfolio['dollar'] = total_dollar * df_portfolio['portfolio_allocation']

    return df_portfolio


# initialize portfolio
df_portfolio = getAllocation(total_dollar=100, d_end_date=backtest_start_date)
df_portfolio

Unnamed: 0,name,over_value_years,currenct_price,portfolio_allocation,dollar
0,SP500,0.23683,212.216248,0.348088,34.80875
1,Europe,-0.029781,34.858818,0.533272,53.327247
2,Greater China,1.176957,32.150002,0.077343,7.73431
3,Developed Asia-Pacific,1.569121,54.173916,0.041297,4.129692
4,SE Asia,,11.937038,0.0,0.0


In [5]:
def updatePrice(df_portfolio, d, d_data=d_data, price_col=price_col):
    l_current_price = []
    for name, data in d_data.items():
        df = data[data['date']<=d].copy()
        l_current_price.append(pd.DataFrame({'name':[name], 'price':df[price_col].iloc[-1]}))
    df_current_price = pd.concat(l_current_price).reset_index(drop=True)

    df_portfolio = df_portfolio.merge(df_current_price, 'left', 'name')
    df_portfolio['dollar'] = df_portfolio['dollar'] * df_portfolio['price'] / df_portfolio['currenct_price']
    df_portfolio['portfolio_allocation'] = df_portfolio['dollar'] / df_portfolio['dollar'].sum()
    df_portfolio['currenct_price'] = df_portfolio['price']
    df_portfolio.drop(columns=['price'], inplace=True)

    return df_portfolio


d_portfolio = {}
d_total_dollar = {}
rebalance_date = backtest_start_date + pd.DateOffset(months=1)

for d in pd.date_range(backtest_start_date, backtest_end_date):
    df_portfolio = updatePrice(df_portfolio, d)
    total_dollar = df_portfolio['dollar'].sum()

    if d == rebalance_date:
        total_dollar = df_portfolio['dollar'].sum()
        df_portfolio = getAllocation(total_dollar=total_dollar, d_end_date=d)
        rebalance_date = rebalance_date + pd.DateOffset(months=1)
        print(f"Rebalance on {d.date()}, total dollar = {total_dollar}")

    d_portfolio[d] = df_portfolio
    d_total_dollar[d] = total_dollar

print(f"final portfolio increased by {total_dollar/100-1:.3f} over {backtest_years} years, which is {(total_dollar/100)**(1/backtest_years)-1:.4f} annualized growth.")


Rebalance on 2017-03-11, total dollar = 102.2837529040654
Rebalance on 2017-04-11, total dollar = 103.71881595705739
Rebalance on 2017-05-11, total dollar = 107.9012464331846
Rebalance on 2017-06-11, total dollar = 109.88156933881095
Rebalance on 2017-07-11, total dollar = 109.86790782118199
Rebalance on 2017-08-11, total dollar = 110.83513516711874
Rebalance on 2017-09-11, total dollar = 113.38298098528803
Rebalance on 2017-10-11, total dollar = 116.49336985480657
Rebalance on 2017-11-11, total dollar = 117.80954281096184
Rebalance on 2017-12-11, total dollar = 121.14270021320124
Rebalance on 2018-01-11, total dollar = 126.3486330351224
Rebalance on 2018-02-11, total dollar = 119.74681662602924
Rebalance on 2018-03-11, total dollar = 126.26335790148163
Rebalance on 2018-04-11, total dollar = 122.15544885051521
Rebalance on 2018-05-11, total dollar = 125.89757172767551
Rebalance on 2018-06-11, total dollar = 127.5813901765822
Rebalance on 2018-07-11, total dollar = 125.64184916411946
R

In [7]:
import plotly.graph_objects as go

color_map = {
    'SP500':'palegreen',
    'Europe':'cyan',
    'Greater China':'lightsalmon',
    'Developed Asia-Pacific': 'grey',
    'SE Asia': 'red'
}

fig = go.Figure()

for name, data in d_data.items():
    index_select = (data['date']>=backtest_start_date) & (data['date']<=backtest_end_date)
    df = data[index_select].copy()
    baseline_price = df[price_col].iloc[0]

    fig.add_trace(go.Scatter(x=df['date'], y=df[price_col]/baseline_price, name=name, line=dict(color=color_map[name], width=0.5)))



fig.add_trace(go.Scatter(x=list(d_total_dollar.keys()), y=pd.Series(d_total_dollar.values())/100, name='Grandma', line=dict(color='white', width=1)))

fig.update_layout(template='plotly_dark', xaxis_title='date', yaxis_title='growth')




In [19]:
df_portfolio_daily = pd.concat(list(d_portfolio.values())).reset_index(drop=True)

df_portfolio_avg = df_portfolio_daily.groupby('name')['dollar'].mean().reset_index()

df_portfolio_avg['pct'] = df_portfolio_avg['dollar']/df_portfolio_avg['dollar'].sum()

df_portfolio_avg

Unnamed: 0,name,dollar,pct
0,Developed Asia-Pacific,19.79592,0.141596
1,Europe,22.628378,0.161856
2,Greater China,15.680917,0.112162
3,SE Asia,25.018766,0.178954
4,SP500,56.681653,0.405432
