In [13]:
import numpy as np
import pandas as pd
import random
import matplotlib.pyplot as plt

## Bootstrapping exercise
- There are two correlated stocks, $X$ and $Y$, which are normally distributed
- You will invest in both stocks with all your money
- Your goal is to find $\alpha \in [0,1]$ (the portion of your money invested in stock $X$) to minimize the variance of the portfolio, i.e.,
 $$ \mathbb{V}\mathrm{AR}[\alpha X + (1-\alpha)Y] $$
- You are given "returns.npy", a 2-D numpy array of the past $200$ returns of $(X,Y)$.

Exercise questions: 
1. Estimate $\mu_X, \mu_Y, \sigma_{X}^2, \sigma_{Y}^2, \sigma_{XY}$ from the sample, and then compute the optimal investment strategy $\alpha$
2. Since we estimated the parameters by looking at a sample, we may be severly biased. Hence, use bootstrapping to iteratively sample $200$ points (with replacement) from the previous returns, and estimate the optimal investment strategy each time. Apply this $B = 500$ times and compute the standard error of the optimal $\alpha$ that we computed on the original set of previous returns.

In [14]:
returns = np.load("returns.npy")
n = 200

In [15]:
#Estimate mean returns
hat_mean = np.mean(returns,0)
print(hat_mean)

[2.97259589 2.98935547]


In [16]:
#Estimate covariance matrix
hat_cov = np.zeros((2,2))
for i in range(n):
    hat_cov = hat_cov + (returns[i]-hat_mean).reshape((2,1))*(returns[i]-hat_mean).reshape((1,2))
hat_cov = hat_cov/(n-1)
hat_varx = hat_cov[0,0]
hat_vary = hat_cov[1,1]
hat_covar = hat_cov[0,1]
print(f'hat_cov = {hat_cov}; hat_varx = {hat_varx}; hat_vary = {hat_vary};hat_covar= {hat_covar } ')

hat_cov = [[1.29126685 0.46509466]
 [0.46509466 1.10756609]]; hat_varx = 1.291266848642483; hat_vary = 1.1075660892225494;hat_covar= 0.4650946600840741 


In [17]:
#estimate optimal investment
def optimal_alpha(varx, vary, covar):
    return (float) (vary - covar)/(varx + vary - 2*covar)
optimal_investment = optimal_alpha(hat_varx, hat_vary, hat_covar)
round(optimal_investment,3)

0.437

In [18]:
def hat_alpha(sample_returns): #make the above process a function
    hat_mean = np.mean(sample_returns,0)
    hat_cov = np.zeros((2,2))
    n = np.size(sample_returns,0)
    for i in range(n):
        hat_cov = hat_cov + (sample_returns[i]-hat_mean).reshape((2,1))*(sample_returns[i]-hat_mean).reshape((1,2))
    hat_cov = hat_cov/(n-1)
    hat_varx = hat_cov[0,0]
    hat_vary = hat_cov[1,1]
    hat_covar = hat_cov[0,1]
    return optimal_alpha(hat_varx, hat_vary, hat_covar)

In [19]:
simulation = 500#number of simulations
B = simulation
samples = n #same
estimations = np.zeros(simulation)#same as before 
for sim in range(simulation): #simulate this many times
    generated_sample = returns[np.random.randint(returns.shape[0], size=samples), :]
    estimations[sim] = hat_alpha(generated_sample)
print("mean", round(np.mean(estimations),3),"min",np.min(estimations), "max", np.max(estimations))

mean 0.436 min 0.2803160703642013 max 0.5704967499471215


In [21]:
#compute standard error
bootstrap_error = np.sqrt( np.sum(np.square(estimations - np.mean(estimations))) /(B-1))
np.round(bootstrap_error,3)

0.055

In [91]:
# so the optimal investment is
round(optimal_investment,3)

0.437

In [92]:
# and the associated bootstrap error
round(bootstrap_error,3)

0.056