## Portfolio Optimalization - Asset Management
#### Arno Goedhuys /  r0636556 / 29 juni 2017

In [None]:
import numpy as np
import pandas as pd
from pandas_datareader import data as wb
import matplotlib.pyplot as plt
from scipy import optimize
from scipy.stats import norm
from scipy import interpolate
from cvxopt import matrix
from cvxopt.blas import dot
from cvxopt.solvers import qp, options
%matplotlib inline

#### Downloading the data
The data is downloaded from google finance. 
<p> The time interval runs from the first of january 2009 to the current date.

In [None]:
assets = ['FOX','HAS','DISCA','MAR','NVDA','VOD','SBUX','GOOGL','ADBE','TRV','UNH','VZ','WMT','GS','DD','AXP','AAPL','MSFT','AMZN','YHOO','KO','CSCO','JPM','UTX','V','MCD','DIS', 'NKE','INTC','GE', 'PG', 'JNJ','HD','IBM','MMM']
pf_data = pd.DataFrame()

for asset in assets:
    pf_data[asset] = wb.DataReader(asset, data_source = 'google', start='2009-1-1')['Close']

In [None]:
def add_stock(list_of_new_stocks):
    for asset in list_of_new_stocks:
        assets.append(asset)
        pf_data[asset] = wb.DataReader(asset, data_source = 'google', start='2009-1-1')['Close']
    return pf_data
pf_data = add_stock(['CHKP','CA','COST','BIDU'])

In [None]:
pf_data = add_stock(['DLTR','EA','EBAY','EXPE','FAST','HOLX','GILD','TSCO','TXN','STX'])

In [None]:
pf_data = add_stock(['ULTA','XRAY'])

In [None]:
amount_of_assets = len(pf_data.columns)
print 'amount of assets: ', amount_of_assets
amount_of_trading_days = len(pf_data.index)
pf_data.head()

#### Normalize data
Devide each value of each asset by its initial value and multipy by 100 so each asset starts with a value of 100. <p> Plot the evolution of each asset.

In [None]:
minimal_risk_weights = convert_weights(convex_optimizer_weights(Covariance_matrix, daily_returns, 0))
return_minimal_risk = portfolio_return(daily_returns, minimal_risk_weights)
print return_minimal_risk, #minimal_risk_weights

#### The optimizations
The optimization for the minimal risk portfolio. The extra constraints that each weight lies between 0 and 0.5 are set by the 
bounds attribute. The optimization used is the sequantial least squares programming. The site PyOpt explains it as:
> SLSQP optimizer is a sequential least squares programming algorithm which uses the Han–Powell quasi–Newton method with a BFGS update of the B–matrix and an L1–test function in the step–length algorithm. The optimizer uses a slightly modified version of Lawson and Hanson’s NNLS nonlinear least-squares solver. 
>
> [PyOpt](http://www.pyopt.org/reference/optimizers.slsqp.html)

The optimization for the maximal sharp portfolio. The same algorithm is used as with the minimal risk.

In [None]:
maximal_sharp_weights = optimize.fmin_slsqp(sharp, amount_of_assets*[1./float(amount_of_assets),],eqcons=[constraint1],
                     bounds=gen_bounds(amount_of_assets))
return_maximal_sharp = portfolio_return(daily_returns, maximal_sharp_weights)
risk_maximal_sharp = portfolio_volatility(Covariance_matrix, np.array(maximal_sharp_weights))
print return_maximal_sharp, risk_maximal_sharp,# maximal_sharp_weights, 
plt.figure(figsize=(15,5))
ax = plt.subplot(111)
indeces = [x for x in range(len(assets))]
indeces2 = [x+0.2 for x in range(len(assets))]
ax.bar(indeces, maximal_sharp_weights,0.2)
ax.bar(indeces2, minimal_risk_weights,0.2)
ax.set_xticks(indeces)
ax.set_xticklabels(assets)
ax.set_ylabel("Weight")
ax.set_xlabel("Asset")
ax.set_title('Weight of each asset in optimal portfolios')
plt.setp(plt.xticks()[1], rotation=90)
plt.legend(['Maximal Sharp Weights', 'Minimal Risk Weights'], loc='upper left')
plt.show()

In [None]:
def Efficient_Frontier(C_matrix, D_returns, amount_of_points, plot=True):
    opt_returns = []
    opt_risks = []
    optimum_weights = []
    minimal_risk_return = portfolio_return(D_returns, np.array(convert_weights(convex_optimizer_weights(C_matrix, D_returns, 0))))
    for i in range(amount_of_points):
        optimum_weights = []
        opt_weights = convex_optimizer_weights(C_matrix, D_returns, minimal_risk_return+i*(max_return(D_returns)-minimal_risk_return-0.03)/amount_of_points)
        #opt_weights = np.array(opt_weights)
        optimum_weights = convert_weights(opt_weights)
        opt_returns.append(portfolio_return(D_returns, optimum_weights))
        optimum_weights = np.array(optimum_weights)
        #print optimum_weights
        #opt_risks.append(np.sqrt(opt_weights[1]))
        opt_risks.append(portfolio_volatility(C_matrix, optimum_weights))
    f = interpolate.interp1d(opt_risks, opt_returns, kind='cubic') 
    xnew = np.arange(opt_risks[0],opt_risks[-2],(opt_risks[-2]-opt_risks[0])/amount_of_points)
    ynew = f(xnew)
    plt.title("Efficient Frontier")
    plt.xlabel('Expected Volatility')
    plt.ylabel('Expected Return')
    if plot:
        plt.plot(xnew, ynew, 'k', linewidth=2.0)
    return f
Efficient_Frontier(Covariance_matrix, daily_returns, 400)

In [None]:
Efficient_Frontier(Rf_Covariance_matrix, new_daily_returns, 400)

#### The results
The two optimized portfolios are plotted on a graph with the x axis the expected risk and the y axis the expected return. To illustrate that these are valid values 25000 other portfolio's with random weights are also plotted on the same graph. Out of the 25000 random portfolio's the one with the minimal risk is selected and returned to verify with the calculated weights, the same is done for the maximal sharp. 

In [None]:
def example_plot(D_returns):
    
    plt.figure(figsize=(12,5))
    Efficient_Frontier(Rf_Covariance_matrix, new_daily_returns, 400)
    Efficient_Frontier(Covariance_matrix, D_returns, 400)
    plt.xlabel('Expected Volatility')
    plt.ylabel('Expected Return')
    
    port_minrisk = portfolio_volatility(Covariance_matrix, np.array(minimal_risk_weights))
    port_return = portfolio_return(D_returns, minimal_risk_weights)
    plt.scatter(port_minrisk,port_return,marker=(5,1,0),color='g',s=200)
    
    #port_maxsharp = portfolio_volatility(assets, maximal_sharp_weights)
    #port_return = portfolio_return(assets, maximal_sharp_weights)
    plt.scatter(risk_maximal_sharp,return_maximal_sharp,marker=(5,1,0),color='r',s=200)
    
    plt.title("Minimal Risk and Maximal Sharp Ratio")
    #plt.title("Minimal Risk Portfolios")
    p1 = plt.Rectangle((0, 0), 0.1, 0.1, fc='g')
    p2 = plt.Rectangle((0, 0), 0.1, 0.1, fc='r')

    plt.legend((p1, p2), ('Minimal Risk Portfolio', 'Maximal Sharp Ratio'), loc='best')
    #plt.legend((p1,), ('Minimal Risk Portfolio',), loc='best')
    plt.show()
    return #[min_risk_weights, max_sharp_weights, minimal_risk_weights, maximal_sharp_weights]
example_plot(daily_returns)

In [None]:
portfolio_performance_min_risk = normalized_data.dot(minimal_risk_weights)
portfolio_performance_max_sharp = normalized_data.dot(maximal_sharp_weights)
(portfolio_performance_min_risk).plot(figsize=(15,8))
(portfolio_performance_max_sharp).plot(figsize=(15,8))
plt.legend(('Minimal Risk Portfolio', 'Maximal Sharp Ratio'), loc='best')
plt.ylabel("Value")
plt.title("Portfolio value over time")

### Different risk parameters


#### 1. VaR (Value at Risk)

Value at risk describes the worst losses at a given confidence level $ \alpha $ and over a given time period t. VaR can be calculated in two ways by taking the largest return in the $\alpha$ % interval of worst returns or by assuming the returns are normal and deriving the VaR from the mean and standard deviation.

First some functions that will be useful later are given.

In [None]:
def portfolio_daily_returns(assets, weights):
    returns = (assets.shift(1) - assets)/ assets
    portfolio_returns = returns.dot(weights)
    return portfolio_returns.drop(portfolio_returns.index[0])
portfolio_daily_returns(normalized_data, gen_random_weights(amount_of_assets)).head()

First the VaR will be calculated by selecting the largest return in the lowest interval.

In [None]:
normalized_data = pf_data / pf_data.iloc[0] * 100
(normalized_data).plot(figsize=(15,8))
plt.legend(bbox_to_anchor=(0., 1.02, 1., .102), loc=3,
           ncol=12, mode="expand", borderaxespad=1.5)
plt.ylabel("Price")
plt.title("Asset prices over time")

#### Convert prices to returns and sample the returns
The returns will be modelled as simple returns: $$ r_i = \frac{P_{i+1}-P_i}{P_i} = \frac{P_{i+1}}{P_i} -1 $$

In [None]:
probabilities = [x for x in range(len(normalized_data))]
daily_returns1 = normalized_data / normalized_data.shift(1) -1
daily_returns2 = daily_returns1.sample(frac=0.25,weights = probabilities)
amount_of_trading_days = len(daily_returns2.index)
daily_returns = daily_returns2.sort_index(ascending = True)
print len(daily_returns) #daily_returns.tail(), len(daily_returns), daily_returns1.tail(20)
(daily_returns).plot(figsize=(15,5))
plt.legend(bbox_to_anchor=(0., 1.02, 1., .102), loc=3,
           ncol=12, mode="expand", borderaxespad=1.5)
plt.ylabel("Daily return")
plt.title("Daily returns over time")

### Risk free rate

The risk free rate is taken as the 10 year yield of the US treasury bond. Current yield : [Bloomberg Markets](https://www.bloomberg.com/markets/rates-bonds/government-bonds/us)

In [None]:
risk_free_rate = 0.0232 # current 10 year yield on US treasury bond
#risk_free_return = [100*(1+0.0232/252.0)**x for x in range(amount_of_trading_days)]
risk_free_return = [0.0232/252.0 for x in range(len(daily_returns))]
risk_free_return = np.array(risk_free_return)
new_daily_returns = daily_returns.assign(risk_free=pd.Series(risk_free_return).values)
new_daily_returns.head()

#### Calculating the covariance matrix

In [None]:
Covariance_matrix = daily_returns.cov() * 252
Rf_Covariance_matrix = new_daily_returns.cov() * 252
#print Covariance_matrix

#### Calculate the expected annual return and expected annual risk of a portfolio
A function to generate a given amount of random weights while making sure the sum of the weights equals 1.

In [None]:
def gen_random_weights(amount):
    weights = np.random.random(amount)
    weights /= np.sum(weights)
    return weights
#print gen_random_weights(amount_of_assets)

The expected annual return is calculated as the amount of yearly trading days times the average daily return: $ E(r_{yearly}) = 252 * E(r_{daily}) $.

The portfolios expected annual return is given by: $ p = w^T * R_{yearly} $. Where w is the weight vector and $ R_{yearly} $ the expected annual return vector.

In [None]:
def portfolio_return(D_returns, weights):
    annual_returns = D_returns.mean() * 252
    total_return = np.sum(annual_returns * weights)
    return total_return

portfolio_return(daily_returns, gen_random_weights(amount_of_assets))


A function to calculate the expected volatility of a portfolio. The volatility is modelled as the standard deviation of portfolios returns. This be calculated as: $$ \sigma^2 = w^T*C*w $$
Where w is again the weights of the assets and C is the covariance matrix of the returns.

In [None]:
def portfolio_volatility(C_matrix, weights):
    total_volatility = np.sqrt(np.dot(weights.T,np.dot(C_matrix, weights)))
    return total_volatility

portfolio_volatility(Covariance_matrix, gen_random_weights(amount_of_assets))

In [None]:
def convert_covariance_matrix(C_matrix):
    cov_matrix = C_matrix
    cov_matrix = cov_matrix.values
    cov_matrix = matrix(cov_matrix)
    return cov_matrix
#print convert_covariance_matrix(Covariance_matrix)

In [None]:
def max_return(D_returns):
    annual_returns = D_returns.mean() * 252
    #print annual_returns
    return max(annual_returns)
max_return(daily_returns)

#### Functions to return the parameters that need to be optimized with their weights as variables 
They are all calculated the same way as their numeric functions previously defined only the weights are variables instead of numeric values. Parameters that have to be maximized are taken negatively because the optimizations look for minimal values and the weights that give the maximal value are the same weights that given the minimal negative value.

The return in function of the weights.

In [None]:
def returns(x):
    #amount_of_assets = len(normalized_data.columns)
    annual_returns = daily_returns.mean() * 252
    variable = []
    for i in range(amount_of_assets):
        variable.append(x[i])
    variable = np.array(variable)
    return -np.sum(annual_returns * variable)

The risk in function of the weights.

In [None]:
def risk(x):
    #amount_of_assets = len(normalized_data.columns)
    cov_matrix = Covariance_matrix
    variable = []
    for i in range(amount_of_assets):
        variable.append(x[i])
    variable = np.array(variable)
    return np.sqrt(np.dot(variable.T,np.dot(cov_matrix, variable)))


The sharp ratio in function of the weights. The sharp ratio is the return devided by the risk.

In [None]:
def HistoricVaR(assets, weights, alpha):
    return np.percentile(portfolio_daily_returns(assets,weights), alpha)
HistoricVaR(normalized_data, gen_random_weights(amount_of_assets), 0.05)

This method is more accurate than assuming a normal distribution because returns are not perfectly normal, but this method can not be used for optimization because every return is in function of the weights so it is unknown where the $ \alpha $ interval ends.

By assuming the returns follow a normal distribution, the VaR can be calculated as: $ VaR = \mu - F^{-1}(\alpha)* \sigma $. First a function to do this nurmerically then in function of the weights.

In [None]:
def sharp(x):
    #amount_of_assets = len(normalized_data.columns)
    #log_returns = np.log(normalized_data / normalized_data.shift(1))
    annual_returns = daily_returns.mean() * 252
    cov_matrix = Covariance_matrix
    variable = []
    for i in range(amount_of_assets):
        variable.append(x[i])
    variable = np.array(variable)
    return -(np.sum(annual_returns * variable)-risk_free_rate) / (np.sqrt(np.dot(variable.T,np.dot(cov_matrix, variable)))) 

#### Extra funtions usefull for the optimization


The contraint for the optimization, that the sum of the weights must be equal to one. 

In [None]:
def constraint1(x):
    variable = []
    for i in range(amount_of_assets):
        variable.append(x[i])
    variable = np.array(variable)
    return np.sum(variable) - 1

A function to generate the valid interval for each weight.