# Complete Markowitz Analysis
This will complete the results under the following:

| n=4 $returns \sim \mathcal{N}(\mu,\sigma)$ Correlation from 0.8-0.9  | n=4 $returns \sim N(\mu,\sigma)$ Independent  | n=4 $returns \sim \mathcal{N}(\mu, sigma)$ Negative correlation (-0.8 to -0.5)  |
|----------------------------------------------------------------------|-----------------------------------------------|---------------------------------------------------------------------------------|
| n=8 $returns \sim \mathcal{N}(\mu,\sigma)$ Correlation from 0.8-0.9  | n=8 $returns \sim N(\mu,\sigma)$ Independent  | n=8 $returns \sim \mathcal{N}(\mu, sigma)$ Negative correlation (-0.8 to -0.5)  |
| n=16 $returns \sim \mathcal{N}(\mu,\sigma)$ Correlation from 0.8-0.9 | n=16 $returns \sim N(\mu,\sigma)$ Independent | n=16 $returns \sim \mathcal{N}(\mu, sigma)$ Negative correlation (-0.8 to -0.5) |

using the method laid out in 4.1, Markowitz portfolio optimization

## Read Packages

In [1]:
%matplotlib inline
import numpy as np
import matplotlib.pyplot as plt
import cvxopt as opt
from cvxopt import blas, solvers
import pandas as pd
from pylab import rcParams
from mosek import iparam
import time
import random
import pypfopt as pyp
import sklearn.covariance
solvers.options['show_progress'] = False

In [2]:
import plotly as py
import plotly.tools as tls
from plotly.graph_objs import *

## Simulate Data
Normal distribution with 0% correlation

Create function to generate random weights

In [3]:
def rand_weights(n):
    '''Produces n random weights that sum to 1'''
    k = np.random.rand(n)
    return k / sum(k)

Create function to generate random portfolio

In [4]:
def random_portfolio(returns):
    '''
    Returns the mean and standard deviation of returns for a randomly generated portfolio
    '''
    
    p = np.asmatrix(np.mean(returns, axis=1))
    w = np.asmatrix(rand_weights(returns.shape[0]))
    C = np.asmatrix(np.cov(returns))
    
    mu = w * p.T
    sigma = np.sqrt(w * C * w.T)
    
    # Test for outliers to make plot prettier
    if sigma > 2:
        return random_portfolio(returns)
    return mu, sigma

Generates $n$ random portfolios

In [5]:
n_portfolios = 500
means, stds = np.column_stack([
    random_portfolio(return_vec)
    for _ in range(n_portfolios)
])

NameError: name 'return_vec' is not defined

## Optimization

Create a series of functions that perform optimizations

In [210]:
def optimal_portfolio(returns):
    n = len(returns)
    returns = np.asmatrix(returns + 5.0)
    
    N = 1000
    mus = [(1/n) + t/N for t in range(N)]#[10**(5.0 * t/N - 1.0) for t in range(N)]
    
    # Convert to cvxopt matrices
    S = opt.matrix(np.cov(returns))
    pbar = opt.matrix(np.mean(returns, axis = 1))
    
    # Create constraint matrices
    G = -opt.matrix(np.eye(n)) # negative nxn identity matrix
    h = opt.matrix(0.0, (n,1))
    A = opt.matrix(1.0, (1,n))
    b = opt.matrix(1.0)
    
    # Calculate efficient frontier weights using quadratic programming
    portfolios = [solvers.qp(mu*S, -pbar, G, h, A, b)['x']
                  for mu in mus]
    ## Calculate risk and returns for frontier
    ret = [blas.dot(pbar, x) for x in portfolios]
    risks = [np.sqrt(blas.dot(x, S*x)) for x in portfolios]
    ## Calculate the 2nd degree polynomail of the frontier curve
    m1 = np.polyfit(ret, risks, 2)
    x1 = np.sqrt(m1[2] / m1[0])
    ## Calculate the optimal portfolio
    wt = solvers.qp(opt.matrix(x1 * S), -pbar, G, h, A, b)['x']
    ret = np.zeros((n,1))
    rsk = np.zeros((n,1))
    for j in range(n):
        ret[j] = wt.T * np.mean(returns, axis = 1)
        rsk[j] = wt.T @ np.cov(returns) @ wt
    return np.asarray(wt), ret, rsk

In [200]:
# Does not work (?)
def optimal_portfolio(returns):
    n = len(returns)
    returns = np.asmatrix(returns + 5)
    
    N = 1000
    mus = [np.ones(N)*(1/n) + 5.0]#[10**(5.0 * t/n - 1.0) for t in range(N)]
    
    # Convert to cvxopt matrices
    S = opt.matrix(np.cov(returns.T))
    pbar = opt.matrix(np.mean(returns.T, axis = 1))
    
    # Create constraint matrices
    G = -opt.matrix(np.eye(n)) # negative nxn identity matrix
    h = opt.matrix(0.0, (n,1))
    A = opt.matrix(1.0, (1,n))
    b = opt.matrix(1.0)
    
    # Calculate efficient frontier weights using quadratic programming
    portfolios = [solvers.qp(mu*S, -pbar, G, h, A, b)['x']
                  for mu in mus]
    ## Calculate risk and returns for frontier
    returns = [blas.dot(pbar, x) for x in portfolios]
    risks = [np.sqrt(blas.dot(x, S*x)) for x in portfolios]
    ## Calculate the 2nd degree polynomail of the frontier curve
    m1 = np.polyfit(returns, risks, 2)
    x1 = np.sqrt(m1[2] / m1[0])
    ## Calculate the optimal portfolio
    wt = solvers.qp(opt.matrix(x1 * S), -pbar, G, h, A, b)['x']
    ret = np.array([])
    rsk = np.array([])
    for j in range(n):
        ret[j] = 1/n * np.sum(wt[j] * np.asmatrix(returns)[:,j])
        rsk[j] = 1/N * np.sum(1/n * np.sum(np.asmatrix(risks)[:,j]))
    return np.asarray(wt), ret.T, risks

Create function that performs 1 optimization

In [183]:
def one_optimization(n_assets, n_obs):
    '''
    First, simulates portfolios then optimizes. 
    This does 30 replications
    '''
    weight_res = np.zeros((99,n_assets))
    return_res = np.zeros((99,n_assets))
    risks_res = np.zeros((99,n_assets))
    
    for i in range(99):
        n_portfolios = 500
        np.random.seed(i)
        return_vec = np.random.randn(n_assets, n_obs)# need to set the seeds here so others use the same values
        means, stds = np.column_stack([
            random_portfolio(return_vec)
            for _ in range(n_portfolios)
            ])
        weights, returns, risks = optimal_portfolio(return_vec)
        weight_res[i,:] = weights.T
        return_res[i,:] = returns.reshape(n_assets,)
        risks_res[i,:] = risks.reshape(n_assets,)
    return weight_res, return_res, risks_res

In [7]:
def one_optimization_zeros(n_assets, n_obs):
    '''
    This is exactly like one_optimization except it replaces 75% of asset 1 and 2 with zeros
    First, simulates portfolios then optimizes. 
    This does 30 replications
    '''
    weight_res = np.zeros((99,n_assets))
    return_res = np.zeros((99,n_assets))
    risks_res = np.zeros((999,n_assets))
    
    for i in range(99):
        n_portfolios = 500
        np.random.seed(i)
        return_vec = np.random.randn(n_assets, n_obs)# need to set the seeds here so others use the same values
        _75_perct = int(len(return_vec[:,0])*3/4)
        return_vec[0,random.sample(list(range(n_obs)), _75_perct)] = 0
        np.random.seed(i*5)
        return_vec[1,random.sample(list(range(n_obs)), _75_perct)] = 0
        
        means, stds = np.column_stack([
            random_portfolio(return_vec)
            for _ in range(n_portfolios)
            ])
        weights, returns, risks = optimal_portfolio(return_vec)
        weight_res[i,:] = weights.T
        return_res[i,:] = returns.reshape(n_assets,)
        risks_res[i,:] = risks.reshape(n_assets,)
    return weight_res, return_res, risks_res

## Perform the optimizations specified
## $n = 4$ 

In [211]:
start = time.time()
n4_normal_weight, n4_normal_return, n4_normal_risks = one_optimization(4, 1000)
end = time.time()
print(start - end)

-51.060664892196655


In [212]:
np.round(n4_normal_return.mean(axis=0),3)

array([5.001, 5.001, 5.001, 5.001])

In [213]:
np.round(n4_normal_return.std(axis=0), 3)

array([0.014, 0.014, 0.014, 0.014])

In [85]:
n4_normal_weight_os, n4_normal_return_os, n4_normal_risks_os = one_optimization_zeros(4, 1000)

In [86]:
np.round(n4_normal_return_os.mean(axis=0),3)

array([5.023, 5.002, 5.   , 5.   ])

In [87]:
np.round(n4_normal_return_os.std(axis=0),3)

array([0.02 , 0.014, 0.014, 0.014])

## $n = 8$ 

In [90]:
n8_normal_weight, n8_normal_return, n8_normal_risks = one_optimization(8, 1000)

In [91]:
np.round(n8_normal_return.mean(axis=0),3)

array([5.034, 5.016, 5.004, 5.001, 5.   , 5.   , 5.   , 5.   ])

In [93]:
np.round(n8_normal_return.std(axis = 0), 3)

array([0.017, 0.013, 0.011, 0.011, 0.011, 0.011, 0.011, 0.011])

In [94]:
n8_normal_weight_os, n8_normal_return_os, n8_normal_risks_os = one_optimization_zeros(8, 1000)

In [95]:
np.round(n8_normal_return_os.mean(axis=0),3)

array([5.034, 5.016, 5.004, 5.001, 5.   , 5.   , 5.   , 5.   ])

In [96]:
np.round(n8_normal_return_os.std(axis=0),3)

array([0.017, 0.013, 0.011, 0.011, 0.011, 0.011, 0.011, 0.011])

## $n=16$

In [97]:
n16_normal_weight, n16_normal_return, n16_normal_risks = one_optimization(16, 1000)

In [98]:
np.round(n16_normal_return.mean(axis=0), 3)

array([5.043, 5.036, 5.027, 5.017, 5.008, 5.004, 5.002, 5.001, 5.   ,
       5.   , 5.   , 5.   , 5.   , 5.   , 5.   , 5.   ])

In [99]:
np.round(n16_normal_return.std(axis=0),3)

array([0.014, 0.013, 0.011, 0.01 , 0.009, 0.008, 0.008, 0.008, 0.008,
       0.008, 0.008, 0.008, 0.008, 0.008, 0.008, 0.008])

In [100]:
n16_normal_weight_os, n16_normal_return_os, n16_normal_risks_os = one_optimization_zeros(16,1000)

In [101]:
np.round(n16_normal_return_os.mean(axis=0),3)

array([5.043, 5.036, 5.027, 5.017, 5.008, 5.004, 5.002, 5.001, 5.   ,
       5.   , 5.   , 5.   , 5.   , 5.   , 5.   , 5.   ])

In [102]:
np.round(n16_normal_return_os.std(axis=0), 3)

array([0.014, 0.013, 0.011, 0.01 , 0.009, 0.008, 0.008, 0.008, 0.008,
       0.008, 0.008, 0.008, 0.008, 0.008, 0.008, 0.008])

## Play with Correlation

## Functions

In [53]:
def cor_optimization(n_assets, n_obs, r):
    '''
    First, simulates portfolios then optimizes. 
    This does 30 replications
    n_assets: number of assets
    n_obs: number of observations
    r: covariance matrix
    '''
    weight_res = np.zeros((99,n_assets))
    return_res = np.zeros((99,n_assets))
    risks_res = np.zeros((99,n_assets))
    
    for i in range(99):
        n_portfolios = 500
        np.random.seed(i)
        
        rng = np.random.default_rng()
        return_vec = rng.multivariate_normal(np.zeros(n_assets), cov = r, size = n_obs).T
        
        means, stds = np.column_stack([
            random_portfolio(return_vec)
            for _ in range(n_portfolios)
            ])
        weights, returns, risks = optimal_portfolio(return_vec)
        weight_res[i,:] = weights.T
        return_res[i,:] = np.array(returns).T
        risks_res[i,:] = np.array(risks).T
    return weight_res, return_res, risks_res

In [54]:
def cor_optimization_zeros(n_assets, n_obs, r):
    '''
    First, simulates portfolios then optimizes. 
    This does 30 replications
    n_assets: number of assets
    n_obs: number of observations
    r: covariance matrix
    '''
    weight_res = np.zeros((99,n_assets))
    return_res = np.zeros((99,n_assets))
    risks_res = np.zeros((99,n_assets))
    
    for i in range(99):
        n_portfolios = 500
        np.random.seed(i)
        
        rng = np.random.default_rng()
        return_vec = rng.multivariate_normal(np.zeros(n_assets), cov = r, size = n_obs).T
        _75_perct = int(len(return_vec[:,0])*3/4)
        return_vec[0,random.sample(list(range(1000)), _75_perct)] = 0
        np.random.seed(i*5)
        return_vec[1,random.sample(list(range(1000)), _75_perct)] = 0
        
        means, stds = np.column_stack([
            random_portfolio(return_vec)
            for _ in range(n_portfolios)
            ])
        weights, returns, risks = optimal_portfolio(return_vec)
        weight_res[i,:] = weights.T
        return_res[i,:] = np.array(returns).T
        risks_res[i,:] = np.array(risks).T
    return weight_res, return_res, risks_res

## Results for Correlation between 0.8-0.9

## $n=4$

In [55]:
n = 4
r = np.random.uniform(0.8, 0.9, (n,n))
np.fill_diagonal(r, 1)

In [56]:
n4_corr_weight, n4_corr_return, n4_corr_risks = cor_optimization(4, 1000, r)


covariance is not positive-semidefinite.



In [57]:
np.round(n4_corr_return.mean(axis=0),3)

array([5.011, 5.   , 4.998, 4.998])

In [58]:
np.round(n4_corr_return.std(axis=0), 3)

array([0.027, 0.026, 0.026, 0.026])

In [60]:
n4_corr_weight_os, n4_corr_return_os, n4_corr_risk_os = cor_optimization_zeros(4, 1000, r)


covariance is not positive-semidefinite.



In [61]:
np.round(n4_corr_return_os.mean(axis = 0), 3)

array([5.013, 5.003, 5.002, 5.002])

In [62]:
np.round(n4_corr_return_os.std(axis = 0), 3)

array([0.028, 0.027, 0.027, 0.027])

## $n = 8$

In [63]:
n = 8
r = np.random.uniform(0.8, 0.9, (n,n))
np.fill_diagonal(r, 1)

In [64]:
n8_corr_weight, n8_corr_return, n8_corr_risks = cor_optimization(8, 1000, r)


covariance is not positive-semidefinite.



zeros

In [65]:
np.round(n8_corr_return.mean(axis = 0), 3)

array([5.019, 5.012, 5.005, 5.003, 5.002, 5.002, 5.002, 5.002])

In [66]:
np.round(n8_corr_return.std(axis = 0), 3)

array([0.032, 0.032, 0.031, 0.031, 0.031, 0.031, 0.031, 0.031])

In [67]:
n8_corr_weight_os, n8_corr_return_os, n8_corr_risks_os = cor_optimization_zeros(8, 1000, r)


covariance is not positive-semidefinite.



In [72]:
np.round(n8_corr_return_os.mean(axis = 0),3)

array([5.022, 5.015, 5.008, 5.006, 5.005, 5.005, 5.005, 5.005])

In [73]:
np.round(n8_corr_return_os.std(axis=0), 3)

array([0.03, 0.03, 0.03, 0.03, 0.03, 0.03, 0.03, 0.03])

## $n=16$

In [74]:
n = 16
r = np.random.uniform(0.8, 0.9, (n,n))
np.fill_diagonal(r, 1)

In [75]:
n16_corr_weight, n16_corr_return, n16__corr_risks = cor_optimization(16,1000,r)


covariance is not positive-semidefinite.



In [76]:
np.round(n16_corr_return.mean(axis=0),3)

array([5.027, 5.024, 5.021, 5.016, 5.012, 5.009, 5.008, 5.007, 5.007,
       5.006, 5.006, 5.006, 5.006, 5.006, 5.006, 5.006])

In [77]:
np.round(n16_corr_return.std(axis=0), 3)

array([0.03 , 0.03 , 0.029, 0.029, 0.029, 0.028, 0.028, 0.028, 0.028,
       0.028, 0.028, 0.028, 0.028, 0.028, 0.028, 0.028])

In [78]:
n16_corr_weight_os, n16_corr_return_os, n16_corr_risks_os = cor_optimization_zeros(16, 1000, r)


covariance is not positive-semidefinite.



In [79]:
np.round(n16_corr_return_os.mean(axis = 0), 3)

array([5.023, 5.02 , 5.016, 5.011, 5.007, 5.004, 5.002, 5.002, 5.001,
       5.001, 5.001, 5.001, 5.001, 5.001, 5.001, 5.001])

In [80]:
np.round(n16_corr_return_os.std(axis = 0), 3)

array([0.031, 0.03 , 0.03 , 0.03 , 0.03 , 0.03 , 0.03 , 0.03 , 0.03 ,
       0.03 , 0.03 , 0.03 , 0.03 , 0.03 , 0.03 , 0.03 ])

## Results for Correlation between -0.6 and -0.5

## $n=4$

In [103]:
n = 4
r = np.random.uniform(-0.8, -0.5, (n,n))
np.fill_diagonal(r, 1)

In [104]:
n4_neg_weight, n4_neg_return, n4_neg_risks = cor_optimization(4, 1000, r)


covariance is not positive-semidefinite.



In [105]:
np.round(n4_neg_return.mean(axis = 0),3)

array([5.023, 5.001, 5.   , 4.999])

In [106]:
np.round(n4_neg_return.std(axis=0),3)

array([0.022, 0.016, 0.016, 0.016])

In [110]:
n4_neg_weight_os, n4_neg_return_os, n4_neg_risks_os = cor_optimization_zeros(4, 1000, r)


covariance is not positive-semidefinite.



In [111]:
np.round(n4_neg_return_os.mean(axis = 0), 3)

array([5.024, 5.001, 4.999, 4.999])

In [112]:
np.round(n4_neg_return_os.std(axis = 0), 3)

array([0.022, 0.016, 0.016, 0.016])

## $n=8$

In [113]:
n = 8
r = np.random.uniform(-0.8, -0.5, (n,n))
np.fill_diagonal(r, 1)

In [114]:
n8_neg_weight, n8_neg_return, n8_neg_risks = cor_optimization(8, 1000, r)


covariance is not positive-semidefinite.



In [115]:
np.round(n8_neg_return.mean(axis=0), 3)

array([5.036, 5.014, 5.002, 4.999, 4.999, 4.998, 4.998, 4.998])

In [116]:
np.round(n8_neg_return.std(axis=0), 3)

array([0.028, 0.023, 0.021, 0.021, 0.021, 0.021, 0.02 , 0.02 ])

In [118]:
n8_neg_weight_os, n8_neg_return_os, n8_neg_risks_os = cor_optimization_zeros(8, 1000, r)


covariance is not positive-semidefinite.



In [119]:
np.round(n8_neg_return_os.mean(axis=0),3)

array([5.042, 5.017, 5.004, 5.   , 4.999, 4.999, 4.999, 4.999])

In [120]:
np.round(n8_neg_return_os.std(axis=0),3)

array([0.026, 0.02 , 0.019, 0.019, 0.019, 0.019, 0.019, 0.019])

## $n=16$

In [121]:
n = 16
r = np.random.uniform(-0.8, -0.5, (n,n))
np.fill_diagonal(r, 1)

In [122]:
n16_neg_weight, n16_neg_return, n16_neg_risks = cor_optimization(16, 1000, r)


covariance is not positive-semidefinite.



In [123]:
np.round(n16_neg_return.mean(axis=0), 3)

array([5.054, 5.043, 5.029, 5.016, 5.007, 5.003, 5.   , 4.999, 4.999,
       4.999, 4.998, 4.998, 4.998, 4.998, 4.998, 4.998])

In [124]:
np.round(n16_neg_return.std(axis=0), 3)

array([0.026, 0.025, 0.024, 0.022, 0.022, 0.021, 0.021, 0.021, 0.021,
       0.021, 0.021, 0.021, 0.021, 0.021, 0.021, 0.021])

In [125]:
n16_neg_weight_os, n16_neg_return_os, n16_neg_risks_os = cor_optimization_zeros(16, 1000, r)


covariance is not positive-semidefinite.



In [127]:
np.round(n16_neg_return_os.mean(axis=0), 3)

array([5.054, 5.042, 5.03 , 5.018, 5.01 , 5.005, 5.003, 5.002, 5.002,
       5.002, 5.002, 5.002, 5.002, 5.002, 5.002, 5.002])

In [126]:
np.round(n16_neg_return_os.std(axis=0), 3)

array([0.028, 0.027, 0.026, 0.026, 0.026, 0.026, 0.026, 0.026, 0.026,
       0.026, 0.026, 0.026, 0.026, 0.026, 0.026, 0.026])

In [430]:
returns = np.random.randn(n_assets, n_obs)
n = len(returns)
returns = np.asmatrix(returns + 5)
    
S = np.array(np.cov(returns))
pbar = np.array(np.mean(returns, axis = 1))
    
ef = pyp.EfficientFrontier(pbar, S)
ef.add_constraint(lambda w: sum(w) == 1)
ef.efficient_return(0.25)

Error: rescode.err_missing_license_file(1008): License cannot be located. The default search path is ':/Users/elizabeth/mosek/mosek.lic:'.

In [134]:
np.array(return_vec.mean(axis = 1))

array([ 3.48258000e-01,  2.74181231e-01,  1.18999905e-02,  6.59073496e-01,
       -2.39475494e-02, -4.85130816e-01, -4.26386162e-01,  3.58909190e-01,
        5.45860521e-01,  1.87972789e+00,  6.60204897e-01,  1.81526114e-01,
       -5.72606307e-02,  7.18939831e-01,  1.87051847e-01, -2.36209022e-01,
       -3.00449480e-01,  8.70202967e-02,  1.37178348e+00, -5.81478325e-02,
        2.37535583e-01,  1.41579003e+00,  1.30727715e+00, -6.11826576e-01,
       -3.65325717e-01, -8.72504438e-01,  8.61429637e-01, -3.00372058e-01,
        3.49067239e-02, -9.57343275e-01,  1.17292762e-01,  5.35940309e-01,
       -1.03589279e+00,  8.92674833e-01, -4.40629358e-01, -5.65390805e-01,
        9.87161738e-01, -4.42075408e-01, -1.14756243e-01, -3.27362654e-01,
       -5.84108205e-02, -3.39745207e-02,  4.35377231e-01,  4.06290120e-01,
       -2.30709381e-01, -3.68330344e-01,  6.23549274e-01, -8.52644424e-01,
        4.88621269e-02, -1.59690378e+00, -4.31735524e-01,  2.84774104e-01,
        2.35070727e-01, -