In [1]:
import warnings
warnings.filterwarnings('ignore')

import os
from duneanalytics import DuneAnalytics
import numpy as np
import pandas as pd
import pandas_datareader.data as reader
import datetime as dt
import statsmodels.api as sm

2022-08-11 13:08:28,581 : INFO : _init_num_threads : NumExpr defaulting to 8 threads.


### Define Helper Functions

In [2]:
def calc_beta(df_ret, token='BTC', benchmark='SP500'):
    X = df_ret[benchmark]
    y = df_ret[token]
    X_sm = sm.add_constant(X)
    model = sm.OLS(y, X_sm)
    results = model.fit()
    return results.params[benchmark]

In [3]:
def annualize_tot_ret(tot_ret, dur_years):
    return (1+tot_ret)**(1/dur_years) - 1

In [4]:
def extract_frame_from_dune_data(dune_data, date_col='day'):    
    dd = dune_data['data']['get_result_by_result_id']
    df = pd.json_normalize(dd, record_prefix='')
    df = df.loc[:, df.columns.str.startswith('data')]
    df.columns = df.columns.str.replace('data.', '', regex=False)
    df['date'] = pd.to_datetime(df[date_col].str.replace('T.*', '', regex=True))
    if date_col != 'date':
        df = df.drop(date_col, axis=1)
    df = df.set_index('date')
    # drop the last row cuz it may not always be a full day
    return df.iloc[:-1, :]

In [5]:
# get Dune Analytics login credentials
MY_USERNAME = os.environ.get('DUNE_USERNAME')
MY_PASSWORD = os.environ.get('DUNE_PASSWORD')
dune = DuneAnalytics(MY_USERNAME, MY_PASSWORD)

dune.login()
dune.fetch_auth_token()

# query daily prices for GLP and TriCrypto
glp_arbi_prices = dune.query_result(dune.query_result_id(query_id=1069389))
tricrypto_prices = dune.query_result(dune.query_result_id(query_id=1145739))
df_glp_prices = (extract_frame_from_dune_data(glp_arbi_prices, 'date')
    .rename({'price':'GLP'}, axis=1))
df_tri_prices = (extract_frame_from_dune_data(tricrypto_prices, 'date')
    .rename({'price':'TriCrypto'}, axis=1))
# TriCrypto price became available on 2021-06-09 and GLP on 2021-08-31. 
# let's cut TriCrypto's price data using 2021-08-31. This will ensure the 
# monthly returns to be calculated over the same months.
df_tri_prices = df_tri_prices.loc[df_glp_prices.index[0]:, :]

## Get Price Data from Yahoo

SP500, Reit, Tips, Bonds, Gold, Broad Commodities, BTC, and ETH 

We want to use the start date of the asset with the least amount of history as the start date of the period we want to download data for all assets. This saves time.

In [None]:
start = dt.date(2021, 9, 1) # GLP price first became available on 2021-08-31.
    # yahoo price reader downloads prices since the day before `start` (including the day before `start`) when running locally. 
    # But when running on streamlit cloud, it excludes the day before `start`, which should be the correct behavior. 
    # I guess it has to do with my timezone and local time? 
today = dt.datetime.now(tz=dt.timezone.utc)
end = dt.date(today.year, today.month, 1)
tickers_names = {
    '^GSPC': 'SP500',
    'VNQ': 'Real Estate',           
    'TIP': 'Inflation-Linked Bonds',   
    'BND': 'Nominal Bonds', 
    'GLD': 'Gold',
    '^SPGSCI': 'Broad Commodities',
    'BTC-USD':'BTC', 
    'ETH-USD':'ETH'
}
tickers = list(tickers_names.keys())

df_prices = reader.get_data_yahoo(tickers, start, end)['Adj Close'].rename(tickers_names, axis=1)
df_prices.columns.name = None

In [None]:
df_prices.head(2)

In [None]:
df_prices.tail(2)

In [None]:
# drop the last row since end date is the first day of the current month, keeping it will result a fake current month return
df_prices = df_prices.iloc[:-1]

In [None]:
df_prices.tail(2)

In [None]:
# download risk free rates, which are already multiplied by 100, so we divide by 100
# behaves correctly starting on `start` not the day before
rfs = reader.DataReader('F-F_Research_Data_Factors', 'famafrench', start, end)[0].RF / 100 
rfs.head()

## Calculate Monthly Excess Returns

In [None]:
monthly_rets = df_prices.resample('M').last().pct_change()
monthly_rets_glp = df_glp_prices.resample('M').last().pct_change()
monthly_rets_tri = df_tri_prices.resample('M').last().pct_change()
monthly_rets = monthly_rets.join(monthly_rets_glp).join(monthly_rets_tri)

In [None]:
monthly_rets.head(2)

In [None]:
monthly_rets_glp.head(2)

In [None]:
monthly_rets_tri.head(2)

In [None]:
monthly_rets = monthly_rets.to_period('M') # because the rfs have monthly period, otherwise can't join
monthly_rets = monthly_rets.join(rfs)

In [None]:
# calculate monthly excess returns
for col in monthly_rets.columns.drop('RF'):
    newcol = col + ' - ' + 'RF'
    monthly_rets[newcol] = monthly_rets[col] - monthly_rets['RF']
# ensure all assets have the same months for fair comparison.  
excess_monthly_rets = monthly_rets.dropna().loc[:, monthly_rets.columns.str.endswith('- RF')]
# remove ' - RF' from the column names for better display
excess_monthly_rets.columns = excess_monthly_rets.columns.str.replace(' - RF', '')
excess_monthly_rets.head()

In [None]:
excess_monthly_rets.tail()

In [None]:
print('Data period: ', excess_monthly_rets.index.min().strftime('%Y-%m'), 
      '~', excess_monthly_rets.index.max().strftime('%Y-%m'))
print("Number of months:", len(excess_monthly_rets))

## Output Tables

In [None]:
# Calculate Beta, Sharpe Ratio, and Excess Return (Ann) using Excess Monthly Returns
#   - Treat SP500 as benchmark
#   - GLP and TriCrypto Yields are excluded
market = 'SP500'
tokens = excess_monthly_rets.columns
betas = [calc_beta(excess_monthly_rets, token, market).round(3) for token in tokens]
df_betas = pd.Series(betas, index=tokens).sort_values().to_frame().rename({0:'Beta'}, axis=1)

sharpe_ratios = (excess_monthly_rets.mean() / excess_monthly_rets.std()).round(3)
df_sharpes = sharpe_ratios.sort_values(ascending=False).to_frame().rename({0:'Sharpe Ratio'}, axis=1)

tot_ret = (1+excess_monthly_rets).prod()-1
dur_years = len(excess_monthly_rets) / 12
ann_excess_rets = annualize_tot_ret(tot_ret, dur_years).round(3) * 100
df_ann_excess_rets = ann_excess_rets.sort_values(ascending=False).to_frame().rename({0:'Excess Return (Ann)'}, axis=1)

In [None]:
df_sharpes.style.format(precision=3)

In [None]:
df_ann_excess_rets.style.format({'Excess Return (Ann)': '{:,.1f}%'.format})

In [None]:
df_betas.style.format(precision=3)