Mean Variance Optimization

1) Summary Stats

In [32]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np

# Load data
path = '../data/multi_asset_etf_data.xlsx'
descriptions_df = pd.read_excel(path, sheet_name='descriptions')
prices_df = pd.read_excel(path, sheet_name='prices')
total_returns_df = pd.read_excel(path, sheet_name='total returns')
excess_returns_df = pd.read_excel(path, sheet_name='excess returns', index_col=0)

# Summary statistics (annualized returns and volatilities)
summary_stats = excess_returns_df.describe().iloc[1:3]
summary_stats.loc['mean'] *= 12
summary_stats.loc['std'] *= 12**0.5
summary_stats.loc['sharpe'] = summary_stats.loc['mean'] / summary_stats.loc['std']
summary_stats



Unnamed: 0,BWX,DBC,EEM,EFA,HYG,IEF,IYR,PSP,QAI,SPY,TIP
mean,-0.011888,-0.009086,0.02696,0.055037,0.037356,0.013939,0.077912,0.092851,0.014959,0.126983,0.016844
std,0.081671,0.168455,0.17994,0.152203,0.077289,0.063197,0.169585,0.215238,0.049007,0.143066,0.051258
sharpe,-0.145563,-0.053935,0.149829,0.361605,0.483335,0.220561,0.459426,0.431386,0.305241,0.887578,0.328618


SPY has highest shapre, BMX is lowest.

2) Descriptive Analytics

In [33]:
# Correlation matrix
corr = excess_returns_df.corr()
corr

Unnamed: 0,BWX,DBC,EEM,EFA,HYG,IEF,IYR,PSP,QAI,SPY,TIP
BWX,1.0,0.233222,0.638705,0.60062,0.620946,0.568903,0.562599,0.55201,0.662425,0.466152,0.680001
DBC,0.233222,1.0,0.516908,0.521447,0.471924,-0.300523,0.29004,0.467111,0.494467,0.448389,0.114379
EEM,0.638705,0.516908,1.0,0.82077,0.694174,0.020477,0.589003,0.761796,0.79113,0.699939,0.379465
EFA,0.60062,0.521447,0.82077,1.0,0.790742,0.018537,0.705926,0.910596,0.863957,0.864822,0.386677
HYG,0.620946,0.471924,0.694174,0.790742,1.0,0.180087,0.739567,0.814182,0.811161,0.794782,0.541758
IEF,0.568903,-0.300523,0.020477,0.018537,0.180087,1.0,0.297047,0.017486,0.176287,-0.005053,0.745025
IYR,0.562599,0.29004,0.589003,0.705926,0.739567,0.297047,1.0,0.75381,0.718397,0.760646,0.590805
PSP,0.55201,0.467111,0.761796,0.910596,0.814182,0.017486,0.75381,1.0,0.870673,0.890719,0.415854
QAI,0.662425,0.494467,0.79113,0.863957,0.811161,0.176287,0.718397,0.870673,1.0,0.862479,0.527724
SPY,0.466152,0.448389,0.699939,0.864822,0.794782,-0.005053,0.760646,0.890719,0.862479,1.0,0.393647


In [41]:
corr_stack = corr.stack().reset_index()
corr_stack.columns = ['Variable1', 'Variable2', 'Correlation']

corr_stack = corr_stack[corr_stack['Variable1'] != corr_stack['Variable2']]

corr_stack['Pairs'] = corr_stack.apply(
    lambda x: tuple(sorted([x['Variable1'], x['Variable2']])),
    axis=1
)
corr_unique = corr_stack.drop_duplicates(subset='Pairs')

max_corr = corr_unique.loc[corr_unique['Correlation'].idxmax()]
min_corr = corr_unique.loc[corr_unique['Correlation'].idxmin()]

print("Maximum Correlation:")
print(f"{max_corr['Variable1']} and {max_corr['Variable2']} with correlation {max_corr['Correlation']}")

print("\nMinimum Correlation:")
print(f"{min_corr['Variable1']} and {min_corr['Variable2']} with correlation {min_corr['Correlation']}")

Maximum Correlation:
EFA and PSP with correlation 0.9105956995777845

Minimum Correlation:
DBC and IEF with correlation -0.30052272295503085


TIPS outperforms both domestic and foreign bonds.

3) MV Frontier

In [43]:
# Mean and covariance matrix
mu = summary_stats.loc['mean'].values
cov = excess_returns_df.cov().values

# Tangency portfolio weights
w_t = (np.linalg.inv(cov) @ mu) / (np.ones(len(mu)) @ np.linalg.inv(cov) @ mu)
print(w_t)

summary_stats.loc['weights'] = w_t
summary_stats

[-21.2925475    0.57057725   3.11848486  -5.10214639   4.54813494
  35.0128627   -7.2712638   -3.2546352  -43.35898924  39.28229409
  -1.2527717 ]


Unnamed: 0,BWX,DBC,EEM,EFA,HYG,IEF,IYR,PSP,QAI,SPY,TIP
mean,-0.011888,-0.009086,0.02696,0.055037,0.037356,0.013939,0.077912,0.092851,0.014959,0.126983,0.016844
std,0.081671,0.168455,0.17994,0.152203,0.077289,0.063197,0.169585,0.215238,0.049007,0.143066,0.051258
sharpe,-0.145563,-0.053935,0.149829,0.361605,0.483335,0.220561,0.459426,0.431386,0.305241,0.887578,0.328618
weights,-21.292548,0.570577,3.118485,-5.102146,4.548135,35.012863,-7.271264,-3.254635,-43.358989,39.282294,-1.252772


The SPY is being shorted even though having a high sharpe, which reflects the importance of correlation on the weighing of the tangency portfolio rather than individual performance.

In [23]:
# Tangency portfolio performance
tangency_returns = excess_returns_df.values @ w_t
print('mean returns of tangency portfolio:', tangency_returns.mean() * 12)
print('std of tangency portfolio:', tangency_returns.std() * 12**0.5)
print('sharpe of tangency portfolio:', tangency_returns.mean() / tangency_returns.std())

mean returns of tangency portfolio: -0.5272759704595076
std of tangency portfolio: 0.3275102374715194
sharpe of tangency portfolio: -0.46475329417525574


4) TIPS

In [24]:
# Tangency portfolio without TIPS
w_t_no_tips = (np.linalg.inv(cov[:-1, :-1]) @ mu[:-1]) / (np.ones(len(mu) - 1) @ np.linalg.inv(cov[:-1, :-1]) @ mu[:-1])
tangent_returns_no_tips = excess_returns_df.iloc[:, :-1].values @ w_t_no_tips
print('mean returns of tangency portfolio without tips:', tangent_returns_no_tips.mean() * 12)
print('std of tangency portfolio without tips:', tangent_returns_no_tips.std() * 12**0.5)
print('sharpe of tangency portfolio without tips:', tangent_returns_no_tips.mean() / tangent_returns_no_tips.std())

# Modified TIPS (adjusting by +0.0012)
modified_tip = excess_returns_df.copy()
modified_tip['TIP'] = excess_returns_df['TIP'] + 0.0012
mod_cov = modified_tip.cov().values
w_t_modified_tip = (np.linalg.inv(mod_cov) @ mu) / (np.ones(len(mu)) @ np.linalg.inv(mod_cov) @ mu)
tangency_returns_modified_tip = modified_tip.values @ w_t_modified_tip
print('mean returns of tangency portfolio with modified tips:', tangency_returns_modified_tip.mean() * 12)
print('std of tangency portfolio with modified tips:', tangency_returns_modified_tip.std() * 12**0.5)
print('sharpe of tangency portfolio with modified tips:', tangency_returns_modified_tip.mean() / tangency_returns_modified_tip.std())

mean returns of tangency portfolio without tips: -0.5808055656351995
std of tangency portfolio without tips: 0.36101926924280414
sharpe of tangency portfolio without tips: -0.4644187696263792
mean returns of tangency portfolio with modified tips: -0.5199256931396691
std of tangency portfolio with modified tips: 0.3275102374715159
sharpe of tangency portfolio with modified tips: -0.4582745889262435


TIPS does expand the investment opportunity set, but not by a large margin, so it is not clear if it would be worth it to branch into the new asset.

Allocations

In [25]:
# Equally weighted portfolio
n = excess_returns_df.mean().sum() / 0.01
equally_weighted_weights = np.ones(len(mu)) * 1 / n
print(equally_weighted_weights)

[0.27157523 0.27157523 0.27157523 0.27157523 0.27157523 0.27157523
 0.27157523 0.27157523 0.27157523 0.27157523 0.27157523]


In [26]:
# Risk parity weights
risk_parity_weights = np.ones(len(mu)) / np.diag(cov)
scalar = 0.01 / (risk_parity_weights @ excess_returns_df.mean())
risk_parity_weights = scalar * risk_parity_weights
print(risk_parity_weights)

[0.52287903 0.12290508 0.10771708 0.15055401 0.58385707 0.87327612
 0.12127347 0.07528382 1.45220222 0.17039729 1.32746067]


In [27]:
# Regularized portfolio
diag_cov = np.diag(np.diag(cov))
scaled_covar = (cov + diag_cov) / 2
reg_weights = np.linalg.inv(scaled_covar) @ excess_returns_df.mean()
scalar = 0.01 / (reg_weights @ excess_returns_df.mean())
reg_weights = scalar * reg_weights
print(reg_weights)

[-0.67537364 -0.13619519 -0.06616486  0.03730235  0.31678093  0.39350632
  0.08027736  0.04954685 -0.15650017  0.62371199  0.32242934]


In [28]:
# Compare portfolio statistics
scalar = 0.01 / (w_t @ excess_returns_df.mean())
scaled_w_t = scalar * w_t
methods = [scaled_w_t, equally_weighted_weights, risk_parity_weights, reg_weights]
index = ['tangency', 'equally weighted', 'risk parity', 'regularized']

In [29]:
# Create DataFrame with weights
weights_df = pd.DataFrame(methods, columns=excess_returns_df.columns, index=index)
weights_df

Unnamed: 0,BWX,DBC,EEM,EFA,HYG,IEF,IYR,PSP,QAI,SPY,TIP
tangency,-0.621364,0.025317,0.140124,-0.120777,0.189482,1.065661,-0.199617,-0.063041,-1.643297,1.116095,-0.116167
equally weighted,0.271575,0.271575,0.271575,0.271575,0.271575,0.271575,0.271575,0.271575,0.271575,0.271575,0.271575
risk parity,0.522879,0.122905,0.107717,0.150554,0.583857,0.873276,0.121273,0.075284,1.452202,0.170397,1.327461
regularized,-0.675374,-0.136195,-0.066165,0.037302,0.316781,0.393506,0.080277,0.049547,-0.1565,0.623712,0.322429


In [30]:
# Print performance for each method
for weight, method in zip(methods, index):
    print(method)
    print('mean returns:', excess_returns_df.mean().values @ weight)
    print('volatility:', np.sqrt(weight @ cov @ weight))
    print('sharpe:', (excess_returns_df.mean().values @ weight) / np.sqrt(weight @ cov @ weight))
    print()

tangency
mean returns: 0.009999999999999998
volatility: 0.021582693589899334
sharpe: 0.46333419683444804

equally weighted
mean returns: 0.010000000000000002
volatility: 0.08415666401309195
sharpe: 0.11882600287535564

risk parity
mean returns: 0.009999999999999998
volatility: 0.08934924818849498
sharpe: 0.11192035974274314

regularized
mean returns: 0.010000000000000002
volatility: 0.02961932718711928
sharpe: 0.3376173920773176



Tangency has the highest Sharpe, as it features the lowest volatility. Regularization comes in next but has a high volatility. Both equally weighted and risky parity have lower Sharpe ratios.

