In [3]:
import pandas as pd 
import numpy as np 


hf_series = pd.read_excel('./../data/proshares_analysis_data.xlsx',sheet_name='hedge_fund_series',index_col=0)
merrill_factors = pd.read_excel('./../data/proshares_analysis_data.xlsx',sheet_name='merrill_factors',index_col=0)
other_data = pd.read_excel('./../data/proshares_analysis_data.xlsx',sheet_name='other_data',index_col=0)
# print(hf_series)
data = hf_series
annualization_factor= 12

etfs = pd.read_excel("./../data/multi_asset_etf_data.xlsx", sheet_name="excess returns")
data = etfs 
data = data.set_index('Date')


## Portfolio

In [1]:
import numpy as np 

def tangency_weights(returns,dropna=True,scale_cov=1, name = 'Tangency'):
    if dropna:
        returns = returns.dropna()

    covmat_full = returns.cov()
    covmat_diag = np.diag(np.diag(covmat_full))
    covmat = scale_cov * covmat_full + (1-scale_cov) * covmat_diag

    weights = np.linalg.solve(covmat,returns.mean())
    weights = weights / weights.sum()
    tangency_weights_df = pd.DataFrame(weights, index=returns.columns, columns=[f'{name} Weights'])

    return tangency_weights_df

def equal_weights(returns): 
    return pd.DataFrame(index = returns.columns, data = 1/len(returns.columns), columns = ['EWP Weights'])

def gmv_weights(returns):
    ones = np.ones(returns.columns.shape)
    cov = returns.cov()* annualization_factor
    cov_inv = np.linalg.inv(cov)
    scaling = 1/(np.transpose(ones) @ cov_inv @ ones)
    gmv_tot = scaling * cov_inv @ ones
    gmv_weights_df = pd.DataFrame(gmv_tot, index=returns.columns, columns=['GMV Weights'])
    return gmv_weights_df

def risk_parity_weights(returns): 
    asset_var = returns.var().to_dict() 
    asset_inv_variance_dict = {asset: 1 / variance for asset, variance in asset_var.items()}
    rp_weights_df = pd.DataFrame.from_dict(asset_inv_variance_dict, orient='index', columns=['RP Weights'])
    return rp_weights_df

def calc_mv_portfolio(mean_rets, cov_matrix, target=None):
    w_tan = tangency_weights(mean_rets)

    if target is None:
        return w_tan

    w_gmv = gmv_weights(mean_rets)
    delta = (target - mean_rets @ w_gmv) / (mean_rets @ w_tan - mean_rets @ w_gmv)
    return delta * w_tan + (1 - delta) * w_gmv

def mv_portfolio(target_ret, tot_returns):
    
    mu_tan = tot_returns.mean() @ tangency_weights(tot_returns, cov_mat = 1)
    mu_gmv = tot_returns.mean() @ gmv_weights(tot_returns)
    
    delta = (target_ret - mu_gmv[0])/(mu_tan[0] - mu_gmv[0])
    mv_weights = (delta * tangency_weights(tot_returns, cov_mat = 1)).values + ((1-delta)*gmv_weights(tot_returns)).values
    
    MV = pd.DataFrame(index = tot_returns.columns[1:], data = mv_weights, columns = ['MV Weights'] )
    MV['tangency weights'] =  tangency_weights(tot_returns, cov_mat = 1).values
    MV['GMV weights'] =   gmv_weights(tot_returns).values


    return MV


def mv_portfolio_excess_returns(target_ret, ex_ret):
    
    mu_tilde = ex_ret.copy().set_index('date').mean()
    Sigma_adj = ex_ret.copy().set_index('date').cov()
    N = Sigma_adj.shape[0]
    delta_tilde = ((np.ones(N) @ Sigma_inv @ mu_tilde)/(mu_tilde @ Sigma_inv @ mu_tilde)) * target_ret
    omega_star = delta_tilde * tan_wts
    
    return omega_star
    

In [5]:
# normalize returns to his the target

target = 0.0025
tan_weights = tangency_weights(data)
adjustment_factor = target / (data.mean() @ tan_weights.values)
tan_weights *= adjustment_factor
tan_weights

Unnamed: 0,Tangency Weights
BWX,-0.155341
DBC,0.006329
EEM,0.035031
EFA,-0.030194
HYG,0.047371
IEF,0.266415
IYR,-0.049904
PSP,-0.01576
QAI,-0.410824
SPY,0.279024


# Forecasting Returns

In [None]:
def OOS_strat(df, factors, start):
    y = df
    X = sm.add_constant(factors)

    forecast_err, null_err,oos_predictions,null_predictions = [], [],[],[]

    for i,j in enumerate(df.index):
        if i >= start:
            currX = X.iloc[:i]
            currY = y.iloc[:i]
            reg = sm.OLS(currY, currX, missing = 'drop').fit()
            null_forecast = currY.mean()
            reg_predict = reg.predict(X.iloc[[i]])
            actual = y.iloc[[i]]
            oos_predictions.append(reg_predict.T)
            null_predictions.append(pd.DataFrame([[reg_predict.index[0]]], columns = ['date'], index = [null_forecast]))
            forecast_err.append(reg_predict.values - actual)
            
            null_err.append(null_forecast - actual)
            
    RSS = (np.array(forecast_err)**2).sum()
    TSS = (np.array(null_err)**2).sum()
    predictions_df = pd.concat(oos_predictions).T.drop_duplicates()
    null_predictions_df = pd.concat(null_predictions).reset_index().set_index('date')
    null_predictions_df = null_predictions_df.rename(columns = {'index':'Baseline Forecasts'})
    
    return ((1 - RSS/TSS),reg,predictions_df,null_predictions_df)

# Factor Model / Regression

In [15]:
import statsmodels.api as sm 
y = data['BWX'] 
x = data.drop(columns = ['BWX'])
model = sm.OLS(y, sm.add_constant(x)).fit()
model.params

const   -0.001805
DBC      0.018777
EEM      0.159118
EFA      0.140758
HYG      0.149834
IEF      0.655677
IYR     -0.012827
PSP      0.009463
QAI      0.304420
SPY     -0.170138
TIP      0.020051
dtype: float64

In [None]:
model.params['const'] * 12 # to annualize the alpha

In [35]:
y.name

'BWX'

In [36]:
import statsmodels.api as sm
def time_series_regression(portfolio, factors, annualization_factor, FF3F = False, resid = False):

    if isinstance(portfolio, pd.Series):
        ff_report = pd.DataFrame(data=portfolio.values,index=portfolio.index)
        bm_residuals = pd.DataFrame(data=portfolio.values,index=portfolio.index)
        portfolio = portfolio.to_frame()
    else:
        ff_report = pd.DataFrame(index=portfolio.columns)
        bm_residuals = pd.DataFrame(columns=portfolio.columns)
    rhs = sm.add_constant(factors)
    for portf in portfolio.columns:
        lhs = portfolio[portf]
        res = sm.OLS(lhs, rhs, missing='drop').fit()
        ff_report.loc[portf, 'alpha_hat'] = res.params['const'] * annualization_factor
        ff_report.loc[portf, 'beta_mkt'] = res.params[1]
        if FF3F:
            ff_report.loc[portf, 'Size beta'] = res.params[2] 
            ff_repo1rt.loc[portf, 'Value beta'] = res.params[3]
            
        ff_report.loc[portf, 'info_ratio'] = np.sqrt(annualization_factor) * res.params['const'] / res.resid.std()
        ff_report.loc[portf, 'treynor_ratio'] = annualization_factor * portfolio[portf].mean() / res.params[1]
        ff_report.loc[portf, 'R-squared'] = res.rsquared
        ff_report.loc[portf, 'Tracking Error'] = (res.resid.std()*np.sqrt(annualization_factor))

        if resid:
            bm_residuals[portf] = res.resid
            
    if resid:
        return bm_residuals
        
    return ff_report[['alpha_hat', 'beta_mkt', 'info_ratio', 'treynor_ratio', 'R-squared','Tracking Error']]

time_series_regression(y, x, annualization_factor=12, FF3F = False, resid = False).loc[y.name,:]

alpha_hat        -0.021657
beta_mkt          0.018777
info_ratio       -0.537488
treynor_ratio    -0.633118
R-squared         0.756595
Tracking Error    0.040293
Name: BWX, dtype: float64

In [41]:
y = data[['BWX', 'DBC']]
x = data.drop(columns = ['DBC','BWX'])
time_series_regression(y, x, annualization_factor=12, FF3F = False, resid = False)


Unnamed: 0,alpha_hat,beta_mkt,info_ratio,treynor_ratio,R-squared,Tracking Error
BWX,-0.02158,0.16029,-0.534765,-0.074167,0.755855,0.040355
DBC,0.0041,0.062411,0.034663,-0.145578,0.506872,0.118294


# Performance Metrics

In [None]:
def portfolio_metrics_basic(returns: pd.DataFrame, annualization_factor: float) -> pd.DataFrame: 
    '''
    Returns Annualized ER, Vol, SR, 
    '''
    
    returns_df = pd.DataFrame(index = returns.columns) 
    mu = returns.mean() * annualization_factor
    vol = returns.std() * np.sqrt(annualization_factor)
    sr = mu / vol

    
    returns_df['Annnualized Return'] = mu.values 
    returns_df['Annnualized Vol'] = vol.values 
    returns_df['Annnualized SR'] = sr.values
    
    return returns_df


In [3]:
def performance_summary(return_data, quantile, annualization_factor):
    """ 
        Returns the Performance Stats for given set of returns
        Inputs: 
            return_data - DataFrame with Date index and Monthly Returns for different assets/strategies.
        Output:
            summary_stats - DataFrame with annualized mean return, vol, sharpe ratio. Skewness, Excess Kurtosis, Var (0.5) and
                            CVaR (0.5) and drawdown based on monthly returns. 
    """
    summary_stats = return_data.mean().to_frame('Mean').apply(lambda x: x*annualization_factor)
    summary_stats['Volatility'] = return_data.std().apply(lambda x: x*np.sqrt(annualization_factor))
    summary_stats['Sharpe Ratio'] = summary_stats['Mean']/summary_stats['Volatility']
    
    summary_stats['Skewness'] = return_data.skew()
    summary_stats['Excess Kurtosis'] = return_data.kurtosis()
    summary_stats[f'VaR ({quantile})'] = return_data.quantile(quantile, axis = 0)
    summary_stats[f'CVaR ({quantile})'] = return_data[return_data <= return_data.quantile(quantile, axis = 0)].mean()
    summary_stats['Min'] = return_data.min()
    summary_stats['Max'] = return_data.max()
    
    wealth_index = 1000*(1+return_data).cumprod()
    previous_peaks = wealth_index.cummax()
    drawdowns = (wealth_index - previous_peaks)/previous_peaks

    summary_stats['Max Drawdown'] = drawdowns.min()
    summary_stats['Peak'] = [previous_peaks[col][:drawdowns[col].idxmin()].idxmax() for col in previous_peaks.columns]
    summary_stats['Bottom'] = drawdowns.idxmin()
    
    recovery_date = []
    for col in wealth_index.columns:
        prev_max = previous_peaks[col][:drawdowns[col].idxmin()].max()
        recovery_wealth = pd.DataFrame([wealth_index[col][drawdowns[col].idxmin():]]).T
        recovery_date.append(recovery_wealth[recovery_wealth[col] >= prev_max].index.min())
    summary_stats['Recovery'] = recovery_date
    
    return summary_stats
performance_summary(data, 0.05, 12)


Unnamed: 0,Mean,Volatility,Sharpe Ratio,Skewness,Excess Kurtosis,VaR (0.05),CVaR (0.05),Min,Max,Max Drawdown,Peak,Bottom,Recovery
BWX,-0.011888,0.081671,-0.145563,-0.184804,0.714401,-0.043311,-0.052614,-0.069406,0.077951,-0.339522,2020-12-31,2023-10-31,NaT
DBC,-0.009086,0.168455,-0.053935,-0.49171,0.708068,-0.083417,-0.113114,-0.177563,0.102184,-0.669081,2011-04-30,2020-04-30,NaT
EEM,0.02696,0.17994,0.149829,-0.104661,1.277024,-0.078424,-0.111063,-0.179084,0.162806,-0.3664,2021-06-30,2022-10-31,NaT
EFA,0.055037,0.152203,0.361605,-0.266527,0.967185,-0.073256,-0.096643,-0.145228,0.142902,-0.276203,2021-08-31,2022-09-30,2024-05-31
HYG,0.037356,0.077289,0.483335,-0.48279,4.299609,-0.027397,-0.049028,-0.10444,0.085044,-0.152857,2021-12-31,2022-09-30,NaT
IEF,0.013939,0.063197,0.220561,-0.029644,0.098295,-0.030108,-0.038691,-0.047632,0.046105,-0.267532,2020-07-31,2023-10-31,NaT
IYR,0.077912,0.169585,0.459426,-0.458684,1.478125,-0.067934,-0.100602,-0.200485,0.132024,-0.35158,2021-12-31,2023-10-31,NaT
PSP,0.092851,0.215238,0.431386,-0.641633,2.574721,-0.100326,-0.14001,-0.27888,0.178449,-0.452017,2021-10-31,2022-09-30,NaT
QAI,0.014959,0.049007,0.305241,-0.538754,2.018508,-0.019026,-0.031692,-0.060508,0.036397,-0.137591,2021-06-30,2022-09-30,NaT
SPY,0.126983,0.143066,0.887578,-0.40758,0.858827,-0.065493,-0.085439,-0.129032,0.126767,-0.239572,2021-12-31,2022-09-30,2024-02-29


# Factor Model

In [None]:
def time_series_regression(portfolio, factors, multiple_factors = False, resid = False):
    
    ff_report = pd.DataFrame(index=portfolio.columns)
    bm_residuals = pd.DataFrame(columns=portfolio.columns)

    rhs = sm.add_constant(factors)

    for portf in portfolio.columns:
        lhs = portfolio[portf]
        res = sm.OLS(lhs, rhs, missing='drop').fit()
        ff_report.loc[portf, 'alpha_hat'] = res.params['const'] * 12
        if multiple_factors:
            ff_report.loc[portf, factors.columns[0] + ' beta'] = res.params[1]
            ff_report.loc[portf, factors.columns[1]+ ' beta'] = res.params[2] 
            ff_report.loc[portf, factors.columns[2]+ ' beta'] = res.params[3]
        else:
            ff_report.loc[portf, factors.name + ' beta'] = res.params[1]

            
        ff_report.loc[portf, 'info_ratio'] = np.sqrt(12) * res.params['const'] / res.resid.std()
        ff_report.loc[portf, 'treynor_ratio'] = 12 * portfolio[portf].mean() / res.params[1]
        ff_report.loc[portf, 'R-squared'] = res.rsquared
        ff_report.loc[portf, 'Tracking Error'] = (res.resid.std()*np.sqrt(12))

        if resid:
            bm_residuals[portf] = res.resid
            
            
        
    if resid:
        return bm_residuals
        
    return ff_report

# Replication and OOS Testing 

In [None]:
def OOS_r2(df, factors, start):
    y = df['SPY']
    X = sm.add_constant(factors)

    forecast_err, null_err = [], []

    for i,j in enumerate(df.index):
        if i >= start:
            currX = X.iloc[:i]
            currY = y.iloc[:i]
            reg = sm.OLS(currY, currX, missing = 'drop').fit()
            null_forecast = currY.mean()
            reg_predict = reg.predict(X.iloc[[i]])
            actual = y.iloc[[i]]
            forecast_err.append(reg_predict - actual)
            null_err.append(null_forecast - actual)
            
    RSS = (np.array(forecast_err)**2).sum()
    TSS = (np.array(null_err)**2).sum()
    
    return ((1 - RSS/TSS),reg)

In [None]:
def OOS_strat(df, factors, start, weight):
    returns = []
    y = df['SPY']
    X = sm.add_constant(factors)

    for i,j in enumerate(df.index):
        if i >= start:
            currX = X.iloc[:i]
            currY = y.iloc[:i]
            reg = sm.OLS(currY, currX, missing = 'drop').fit()
            pred = reg.predict(X.iloc[[i]])
            w = pred * weight
            returns.append((df.iloc[i]['SPY'] * w)[0])

    df_strat = pd.DataFrame(data = returns, index = df.iloc[-(len(returns)):].index, columns = ['Strat Returns'])
    return df_strat

# FX

* has to be in log terms for the exchange rates, log(1+rf) for risk free rates

In [44]:
risk_free_rates = pd.read_excel('.././data/fx_rf_data.xlsx', sheet_name='risk-free rates')
risk_free_rates.index = risk_free_rates['date']
risk_free_rates = risk_free_rates.drop(['date'],axis=1)
for col in risk_free_rates.columns:
    risk_free_rates[col] = risk_free_rates[col]
    risk_free_rates['log_'+col] = np.log(1+risk_free_rates[col])
risk_free_rates.head()

Unnamed: 0_level_0,USD,JPY,EUR,GBP,MXN,CHF,log_USD,log_JPY,log_EUR,log_GBP,log_MXN,log_CHF
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
2008-01-01,0.044,0.00478,0.0425,0.057,0.075,0.020645,0.043059,0.004769,0.041622,0.055435,0.072321,0.020435
2008-01-02,0.041563,0.00478,0.036,0.05475,0.075,0.020645,0.040722,0.004769,0.035367,0.053304,0.072321,0.020435
2008-01-03,0.043463,0.00478,0.0394,0.056,0.075,0.0215,0.042545,0.004769,0.038644,0.054488,0.072321,0.021272
2008-01-04,0.042188,0.00478,0.04065,0.055,0.075,0.02095,0.041322,0.004769,0.039846,0.053541,0.072321,0.020734
2008-01-07,0.042188,0.00493,0.041,0.0546,0.075,0.0202,0.041322,0.004918,0.040182,0.053162,0.072321,0.019999


In [46]:
fx_rates = pd.read_excel('.././data/fx_rf_data.xlsx', sheet_name='exchange rates')
fx_rates.index = fx_rates['date']
fx_rates = fx_rates.drop(['date'],axis=1)
for col in fx_rates.columns:
    fx_rates['log_'+col] = np.log(fx_rates[col])
fx_rates.head()

Unnamed: 0_level_0,JPY,EUR,GBP,MXN,CHF,log_JPY,log_EUR,log_GBP,log_MXN,log_CHF
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
2008-01-01,0.008957,1.4592,1.9864,0.09183,0.8824,-4.71532,0.377888,0.686324,-2.387816,-0.12511
2008-01-02,0.009119,1.4715,1.9808,0.09162,0.8933,-4.697395,0.386282,0.683501,-2.390106,-0.112833
2008-01-03,0.009148,1.475,1.971,0.09187,0.9002,-4.69422,0.388658,0.678541,-2.387381,-0.105138
2008-01-04,0.009209,1.4743,1.974,0.09154,0.9022,-4.687574,0.388183,0.680062,-2.390979,-0.102919
2008-01-07,0.009159,1.4696,1.9704,0.09176,0.8957,-4.693018,0.38499,0.678237,-2.388579,-0.11015


## Performance of the currencies -> is there a premium 
- UIP: mean return of the currencies = 0 as change in spot fx rate is explained by changes in risk free rates

In [47]:
fx_spot_list = ['log_GBP'
                ,'log_EUR'
                ,'log_CHF'
                ,'log_JPY']

fx_hldg_lst = []
for fx in fx_spot_list:
    print(fx)
    # this basically compares the change in spot fx rate + rf of x (of the previous day) - rf usd (of the previous day)
    fx_hldg_excess_ret = fx_rates[fx] - fx_rates[fx].shift(1) + risk_free_rates[fx].shift(1) - risk_free_rates['log_USD'].shift(1)
    fx_hldg_summary = performance_summary(fx_hldg_excess_ret.to_frame().dropna(), 365) #the performance metrics 
    fx_hldg_lst.append(fx_hldg_summary)

fx_hldg_perf_summary = pd.concat(fx_hldg_lst)
perf = fx_hldg_perf_summary.T
perf.columns = fx_spot_list 
perf

log_GBP


NameError: name 'performance_summary' is not defined