In [25]:
import numpy as np
import pandas as pd
import seaborn as sns
import cvxpy as cp
import matplotlib.pyplot as plt

### Code to Retrieve, Process Data

In [None]:
universe = pd.DataFrame(
    data=[
        ['Large Cap US Equity', 'IWB'],
        ['Small Cap US Equity', 'IWM'],
        ['Dev Mkts non-US Equity', 'EFA'],
        ['Emerg Mkts Equity', 'EEM'],
        ['Global REIT', 'VNQ'],
        ['Corp Bonds', 'LQD'],
        ['Short-Term Treasury', 'SHY'],
    ],
    columns=['Segment', 'Ticker']
)

In [None]:
# load the data
frames = []
for _, row in universe.iterrows():
    ticker = row.Ticker
    frames.append(pd.read_csv(f'{ticker}_daily.csv').set_index('Date')['Adj Close'].rename(ticker))

prices = pd.concat(frames, axis=1)
n_obs = prices.shape[0]

In [None]:
# define frequencies we want to analyze
freqs = {
    'daily': 1,
    'weekly': 5,
    'monthly': 21,
    'quarterly': 62,
    'yearly': 252
}

In [None]:
# calculate the simple price change for each period
returns = {k: prices.iloc[0:n_obs:v, :].pct_change().dropna(axis=0, thresh=prices.shape[1]) for k,v in freqs.items()}

In [2]:
# for freq, rts in returns.items():
freq = 'monthly'
def calc_stats(freq, returns):
    rts = returns[freq]
    period_stats = pd.DataFrame(index=None, columns=rts.columns)
    period_stats.loc['Avg Return', :] = rts.mean()
    period_stats.loc['Std Dev', :] = rts.std()
    period_stats.loc['Return/Risk', :] = period_stats.loc['Avg Return', :] / period_stats.loc['Std Dev', :]
    period_stats.loc['Skewness', :] = rts.skew()
    period_stats.loc['Kurtosis', :] = rts.kurt()
    period_stats.loc['10th Pct', :] = rts.apply(np.percentile, axis=0, q=10)
    period_stats.loc['25th Pct', :] = rts.apply(np.percentile, axis=0, q=25)
    period_stats.loc['50th Pct', :] = rts.apply(np.percentile, axis=0, q=50)
    period_stats.loc['75th Pct', :] = rts.apply(np.percentile, axis=0, q=75)
    period_stats.loc['90th Pct', :] = rts.apply(np.percentile, axis=0, q=90)
    
    stats_styled = period_stats.T.style.format({
        'Avg Return': '{:,.2%}',
        'Std Dev': '{:,.2%}',
        'Return/Risk': '{:,.2}',
        'Skewness': '{:,.2}',
        'Kurtosis': '{:,.2}',
        '10th Pct': '{:,.2%}',
        '25th Pct': '{:,.2%}',
        '50th Pct': '{:,.2%}',
        '75th Pct': '{:,.2%}',
        '90th Pct': '{:,.2%}',
    })
    return stats_styled


def plot_corr(freq, returns, lower_only=True):
    rts = returns[freq]
    cor = rts.corr()
    if lower_only:
        # create mask to only show bottom triangle of matrix
        mask = np.triu(np.ones_like(cor, dtype=bool))
        # exclude diagonal (we want to see it)
        mask[np.diag_indices_from(mask)] = False
    else:
        mask = np.zeros_like(rts.corr())
    hm = sns.heatmap(cor, mask=mask, cmap='coolwarm', center=0, annot=True)
    _ = hm.set_title(f'{freq.capitalize()} Retun Correlations')

def plot_time_series(freq, returns):
    rts = returns[freq]
    rts.add(1).cumprod().add(-1).plot(kind='line', title=f'{freq.capitalize()} Cumulative Returns')

def plot_hist(freq, returns, alpha=0.5):
    rts = returns[freq]
    fig, ax = plt.subplots()

    for col in rts.columns:
      ax.hist(rts[col], alpha=alpha, bins=50)  # Adjust alpha as needed

    ax.legend(rts.columns, loc='best', prop={'size':10}, handletextpad=0.2)  # Adjust legend options 
    
    # Add labels and title
    plt.xlabel("Return")
    plt.ylabel("Frequency")
    plt.title(f"{freq.capitalize()} Return Distribution")
    
    # Show the plot
    plt.show()

### Monthly Stats

In [None]:
calc_stats('monthly', returns)

In [None]:
plot_corr('monthly', returns, lower_only=False)

In [None]:
plot_time_series('monthly', returns)

In [None]:
plot_hist('monthly', returns)

### Intro

In this exercise I am discussing the preferences of different risk measurement techniques and impact on asset allocation, The summary of investment preferences follows with supporting tables and graphs after that.

### Investment Preferences

The investable universe consists of IWB, IWM, EFA, EEM, VNQ, LQD, and SHY ETFs. 

#### Mean-Variance
Mean-variance optimization will prefer assets with a higher level of return per unit of risk and those that lower variance of the overall portfolio by having lower correlations with the other assets in the investment universe. Since higher moments (skewness/kurtosis) are not considered, these statistics will not affect the allocation. 
- Since defensive (SHY and LQD) assets have low or negative correlation to REITs and equities these will have a significant allocation the MV framework. I suspect SHY will get a higher allocation in the lower target return portfolios and LQD weight will increase as target return move in the same direction. 
- IWB has the highest return/risk ratio and low correlation to defensive asset classes. I suspect MV approach will allocate a significant portion to this asset. Of course, the actual allocation will greatly very between the different return targets for the portfolio.
- IWM, EFA, EEM have middling reutnr/risk ratios and correlations to other asset classes. Since MV allocation tends to pile into a couple of assets (assuming minimal constraints) and not utilize the full asset universe I predict that there will not be a large allocation to these.
- I don't foresee the optimizer allocating significant capital to REITs; it has higher correlation to the defensive asset classes (corp. bonds and treasuries) than equities but a lower level of return per unit of risk.

#### Mean Absolute Deviation (MAD)

Mean-MAD optimization focuses on minimizing the average deviation of returns from the mean, which will emphasize downside protection. This means the mean-MAD optimization will prefer assets that have lower kurtosis (lighter tails) because there are fewer observations with large distance from the mean. Additionally, since the distance from the mean is not squared (unlike mean-variance) mean-MAD will penalize more volatile assets less and prefer diversitying assets with different return patterns.
- LQD, VNQ have heavy tails based on kurtosis measures, so this optimization technique is unlikely to allocate a large amount to these assets.
- SHY will be preferred due to its diversifying benefit
- From equities, IWM will likely to be preferred over IWB due to slightly lower correlation with other assets and lower kurtosis. EFA has similar dispersion (10th and 90th percentiles) to IWB and EEM is similar to IWM, but with lower average return and similar correlations. I don't see a significant allocation to these 2 asset classes.
   
### Conditional Value at Risk (CVaR)

Mean-CVaR is similar to mean-MAD in the sense of minimizing deviation of returns, but it focuses specifically on the downside. So assets with a heavy left tails will not be favored by this approach. To put it in terms of our data tables, negatively skewed assets will be penalized by this approach. 
- LQD, VNQ have negative skweness and therefore a higher CVaR. These will not get a high allocation.
- IWB and EFA also have negative skewness and therefore will be penalized more by this approach.
- IWM, EEM, and SHY have positive skewness and the optimizer should favor these assets.

In [None]:
def solve_min_var(mean, cov, w, constraints, labels):
    """
    Helper function that will be used throughout the homework
    """
    # minimize variance of portfolio
    obj = cp.Minimize(cp.quad_form(w, cov))
    
    prob = cp.Problem(
        objective=obj,
        constraints=constraints,
    )
    
    prob.solve(solver=cp.ECOS)
    print(prob.status)
    assert prob.status == 'optimal'

    p_var = w.value @ cov @ w.value.T
    p_risk = np.sqrt(p_var)
    p_ret = w.value @ mean
    print('\nPortfolio Weights:\n')
    print(pd.Series(index=labels, data=np.round(w.value * 1e2, 2), name='Weight'))
    print(f'\nPortfolio risk: {round(p_risk * 1e2, 2)}%')
    print(f'Portfolio return: {round(p_ret * 1e2, 2)}%')

In [None]:
freq = 'yearly'
rts = returns[freq]
mean = rts.mean()
cov = rts.cov()

# define the vector we're solving
w = cp.Variable(len(mean))

constraints = [
    # sum of all weights is one
    cp.sum(w) == 1,
    # all weights non-negative
    w >= 0,
    w @ mean >= (0.08 * (freqs[freq]/252)),
]

solve_min_var(mean.values, cov.values, w, constraints, labels=list(rts.columns))