# 1.1 Load libraries

Note: After resolving conflicts, runs the entire notebook to make sure none of the old code is broken.

In [1]:
import pandas as pd
import numpy as np
import os
import cvxpy as cvx

# 1.3 Load the stocks into memory and perform some preprocessing steps

The data is stored as key-value pairs in a dictionary, with the ticker referencing the respective dataframe. Following that, the date is changed into a datetime object and a new column for log-returns is added.

In [2]:
# load stocks into a dictionary
stocks = {}
do_not_append = ['FSELX','IPHI'] # FSELX is our fund, while IPHI does not have data and has been bought over by MRVL
for file_name in os.listdir('data/'):
  ticker = file_name.split(".")[0]
  if ticker in do_not_append:
    pass
  else:
    stocks[f'{ticker}'] = pd.read_csv(f'data/{file_name}') # for each stock, add its ticker as the key and dataframe as the value

In [3]:
print(list(stocks.keys())) # print the stock tickers for easy referencing

['005930', 'ADI', 'AEIS', 'AMAT', 'AMBA', 'AMD', 'AOSL', 'ASX', 'AVGO', 'CRUS', 'DIOD', 'ENPH', 'FLEX', 'IIVI', 'INTC', 'JBL', 'LRCX', 'MCHP', 'MRVL', 'MTSI', 'MU', 'MXL', 'NVDA', 'NXPI', 'OLED', 'ON', 'QCOM', 'SEDG', 'SGH', 'SMTC', 'SYNA', 'TSM', 'TTMI', 'TXN', 'XLNX']


In [4]:
# Change date column into a datetime object
# Add a new column for log returns
for ticker, df in stocks.items():
  df['Date'] = df['Date'].apply(pd.to_datetime)
  df['LogReturns'] = np.append(np.nan,np.diff(np.log(df['Adj Close']))) # Fill dataframe with the log returns. The first value will be nan because there is no log returns for it.

In [5]:
# preview a stock
stocks['TXN'].head()

Unnamed: 0,Date,Open,High,Low,Close,Adj Close,Volume,LogReturns
0,2016-10-04,69.660004,69.910004,69.099998,69.589996,61.00631,4089500,
1,2016-10-05,69.849998,70.839996,69.010002,70.599998,61.891724,4789600,0.014409
2,2016-10-06,70.440002,71.0,70.029999,70.959999,62.207321,4061500,0.005086
3,2016-10-07,71.0,71.059998,70.449997,70.870003,62.128426,3673600,-0.001269
4,2016-10-10,71.080002,71.330002,69.900002,69.93,61.304375,4893200,-0.013352


In [6]:
top_30 = ['SYNA', 'MRVL', 'SGH', 'ENPH', 'JBL', 'CRUS', 'MCHP', 'AEIS', 'MTSI', 'AMAT', 'NVDA', 'ON', 'TTMI', 'SMTC', 'AVGO', 'XLNX', 'TSM', 'IIVI', 'QCOM', 'NXPI', 'FLEX', 'DIOD', 'AMD', 'MXL', 'ADI', 'INTC', 'TXN', 'LRCX', 'MU', 'OLED']
stocks_30 = {stock:df for stock,df in stocks.items() if stock in top_30}

# 1.4 Aggregate the log-returns into one dataframe

In [7]:
log_returns = pd.DataFrame(data=np.full((stocks['TXN'].shape[0],30),np.nan), columns = sorted(list(stocks_30.keys())), index = stocks_30['TXN'].Date) # create empty dataframe filled with NaNs, with index = Date and columns = stocks

# copy the log returns over from the stocks dictionary
for ticker, df in stocks_30.items():
  log_returns[ticker] = log_returns.index.map(stocks_30[ticker].set_index('Date')['LogReturns'])

# if simple returns are needed
simple_returns = np.exp(log_returns)-1
simple_returns['RF'] = np.repeat(0.0001,simple_returns.shape[0])

In [8]:
# preview log_returns
simple_returns.head()

Unnamed: 0_level_0,ADI,AEIS,AMAT,AMD,AVGO,CRUS,DIOD,ENPH,FLEX,IIVI,...,ON,QCOM,SGH,SMTC,SYNA,TSM,TTMI,TXN,XLNX,RF
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2016-10-04,,,,,,,,,,,...,,,,,,,,,,0.0001
2016-10-05,0.006849,0.008323,0.011145,-0.02726,0.026205,0.018109,0.024402,-0.008333,0.030837,0.0431,...,0.018062,0.002399,,0.008727,0.049058,0.004607,0.019164,0.014513,0.006596,0.0001
2016-10-06,0.000928,0.027513,-0.00167,0.026549,0.004093,0.012919,0.00327,0.016807,0.004986,0.026619,...,0.002419,0.010473,,-0.002884,0.054531,0.007861,0.001709,0.005099,0.000375,0.0001
2016-10-07,-0.004943,-0.015448,-0.004684,-0.030172,0.000115,-0.000739,-0.01676,-0.041322,-0.000709,-0.005418,...,-0.012068,0.009624,,-0.0047,0.017339,0.013325,0.014505,-0.001268,-0.010107,0.0001
2016-10-10,-0.01661,0.004184,-0.012437,0.013333,0.003214,0.023122,-0.007102,-0.086207,-0.004965,0.003891,...,-0.012215,-0.013785,,-0.027243,0.01463,0.003207,0.0,-0.013264,-0.012857,0.0001


The stock SGH has a lot of NaN values. We take a closer look at it.

In [9]:
# preview SGH
stocks['SGH'].head()

Unnamed: 0,Date,Open,High,Low,Close,Adj Close,Volume,LogReturns
0,2017-05-24,12.0,14.0,11.5,13.45,13.45,1415200,
1,2017-05-25,13.85,14.32,13.35,13.5,13.5,237900,0.003711
2,2017-05-26,13.36,13.624,12.46,13.23,13.23,56200,-0.020203
3,2017-05-30,13.2,13.255,12.28,12.98,12.98,71200,-0.019077
4,2017-05-31,13.02,13.5,12.93,13.1,13.1,55700,0.009203


SGH only begins at a later date, explaining the NaNs we see in our earlier preview.

## 2.1 Optimization

In [193]:
# drop nans
simple_returns = simple_returns.dropna()

# get in-sample time window to perform optimization on (inclusive)
is_start_date = '2019-6-30'
is_end_date = '2020-6-30'

# get out-of-sample time window for evaluation (inclusive)
oos_start_date = '2020-7-1'
oos_end_date = '2020-12-31'

in_sample_window = simple_returns[(simple_returns.index>=is_start_date)&(simple_returns.index<=is_end_date)].copy()
out_sample_window = simple_returns[(simple_returns.index>=oos_start_date)&(simple_returns.index<=oos_end_date)].copy()

In [194]:
from pypfopt import EfficientFrontier, risk_models, expected_returns, objective_functions, base_optimizer

In [195]:
# Calculate expected returns and sample covariance using PyPortfolioOpt’s built-in methods
mu = expected_returns.mean_historical_return(in_sample_window, returns_data=True)
S = risk_models.sample_cov(in_sample_window, returns_data=True)

oos_mu = expected_returns.mean_historical_return(out_sample_window, returns_data=True)
oos_S = risk_models.sample_cov(out_sample_window, returns_data=True)

#print('Expected Returns:')
#print(mu)
#print('\n')
#print('Covariance Matrix:')
#print(S)
solver = "ECOS"

# Attempt 1:
* Simple sub-sectors, semiconductors and others
* L2 regularization with gamma = 5

**From the pyportfolio webpage**

*In order to coerce the mean-variance optimizer to produce more non-negligible weights, we add what can be thought of as a “small weights penalty” to all of the objective functions, parameterised by γ (gamma).*

It is refered to as L2 regularisation despite having a different purpose from its usual use, to keep weights small. Here, is it used to keep weights bigger.

Note: Check different gamma values and portfolio's overall performance.

https://pyportfolioopt.readthedocs.io/en/latest/MeanVariance.html#pypfopt.objective_functions.L2_reg

In [196]:
# Set each stock to a sub-sector 
# Until someone has a better idea, use:
semiconductors = ['ADI','AMAT','AMD','AVGO','CRUS',
                  'DIOD','INTC','LRCX','MCHP','MRVL',
                  'MTSI','MU','MXL','NVDA','NXPI',
                  'OLED','ON','QCOM','SGH','SMTC',
                  'SYNA','TSM','TXN','XLNX']
others = ['AEIS','ENPH','FLEX','JBL','TTMI', 
               'IIVI','RF'] # AEIS and IIVI are also in the semiconductor subsector, but just putting them here first since they're in both areas. Every stock here except for IIVI are in the electrical/electronics scene. RF = risk-free

**Calculating risk aversion**

$U=E(R)-k*std(R)^2$

Assuming a portfolio consisting only of the market portfolio + risk-free assets, and expressing $E(R)$ and $std(R)$ in terms of their weights, $w$, we have:

$U=0.1*w+0.01*(1-w) - k*(w*0.18)^2$

where $w$ is the weight allocated to the market portfolio. Then,

$dU/dw = -0.0648*w*k+0.099$

If we choose w = 1, and then solving for $dU/dw = 0$, we get $k=1.528$

In [197]:
ef = EfficientFrontier(mu, S, solver= solver)
k = 1.528 
print(f"k = {k}")

ef.add_constraint(lambda x: x >= 0) # weights >= 0
ef.add_constraint(lambda x: x <= 0.25) # weight <= 0.25

sector_mapper = {} # initialise sector_mapper
for stock in semiconductors:
  sector_mapper[stock] = 'semiconductors'
for stock in others:
  sector_mapper[stock] = 'others'

sector_lower = {'others': 0.3} # at least 30% to others

ef.add_sector_constraints(sector_mapper=sector_mapper, sector_lower=sector_lower, sector_upper={}) # add sector constraint

ef.add_objective(objective_functions.L2_reg, gamma=5) # L2 regularisation parameter, defaults to 1. This adds a small weights penalty, gamma, to the objective function. Increase for more non-negligible weights. 

optimal_weights_portfolio = ef.max_quadratic_utility(risk_aversion = 2*k) # optimize by the quadratic utility function mean - k/2 * variance (hence the 2*k in the parameters)
ef.portfolio_performance(verbose=True, risk_free_rate=0.01) 
results = pd.Series(np.round(np.array(list(optimal_weights_portfolio.values())),4),index = in_sample_window.columns)
results

k = 1.528
Expected annual return: 79.9%
Annual volatility: 45.4%
Sharpe Ratio: 1.74


ADI     0.0021
AEIS    0.0153
AMAT    0.0166
AMD     0.0648
AVGO    0.0066
CRUS    0.0255
DIOD    0.0328
ENPH    0.1254
FLEX    0.0078
IIVI    0.0391
INTC    0.0246
JBL     0.0109
LRCX    0.0510
MCHP    0.0027
MRVL    0.0415
MTSI    0.1082
MU      0.0163
MXL     0.0000
NVDA    0.1170
NXPI    0.0025
OLED    0.0000
ON      0.0000
QCOM    0.0202
SGH     0.0000
SMTC    0.0000
SYNA    0.0966
TSM     0.0572
TTMI    0.0287
TXN     0.0140
XLNX    0.0000
RF      0.0727
dtype: float64

In [198]:
# check against out of sample window
_ = base_optimizer.portfolio_performance(results, oos_mu, oos_S, verbose=True, risk_free_rate=0.01)

Expected annual return: 255.1%
Annual volatility: 29.6%
Sharpe Ratio: 8.57


# Attempt 2:
- Sub-sectors by market cap, with floor = 30%, and ceiling = 40%
- using L2 regularization with gamma = 5

**From the pyportfolio webpage**

*In order to coerce the mean-variance optimizer to produce more non-negligible weights, we add what can be thought of as a “small weights penalty” to all of the objective functions, parameterised by γ (gamma).*

It is refered to as L2 regularisation despite having a different purpose from its usual use, to keep weights small. Here, is it used to keep weights bigger.

Note: Check different gamma values and portfolio's overall performance.

https://pyportfolioopt.readthedocs.io/en/latest/MeanVariance.html#pypfopt.objective_functions.L2_reg

In [199]:
# market cap by three categories, 100B+, 10B+, and 1B+
cat_1 = ["TSM", "NVDA", "INTC", "AVGO", "TXN", "QCOM", "AMD", "AMAT"] # 100B+ market cap
cat_2 = ["ADI", "MU", "LRCX", "MRVL", "NXPI", "MCHP", "XLNX", "ENPH", "ON"] # 10B+ market cap
cat_3 = ["JBL", "FLEX", "OLED", "SYNA", "IIVI", "SMTC", "CRUS", "MTSI", "DIOD", "MXL", "AEIS", "TTMI", "SGH"] # 1B+ market cap

**Calculating risk aversion**

$U=E(R)-k*std(R)^2$

Assuming a portfolio consisting only of the market portfolio + risk-free assets, and expressing $E(R)$ and $std(R)$ in terms of their weights, $w$, we have:

$U=0.1*w+0.01*(1-w) - k*(w*0.18)^2$

where $w$ is the weight allocated to the market portfolio. Then,

$dU/dw = -0.0648*w*k+0.099$

If we choose w = 1, and then solving for $dU/dw = 0$, we get $k=1.528$

In [200]:
ef = EfficientFrontier(mu, S, solver= solver)
k = 1.528 
print(f"k = {k}")

ef.add_constraint(lambda x: x >= 0) # weights >= 0
ef.add_constraint(lambda x: x <= 0.25) # weight <= 0.25

sector_mapper = {} # initialise sector_mapper
for stock in cat_1:
  sector_mapper[stock] = 'cat_1'
for stock in cat_2:
  sector_mapper[stock] = 'cat_2'
for stock in cat_3:
  sector_mapper[stock] = 'cat_3'
sector_mapper['RF'] = 'rf'

sector_lower = {'cat_1': 0.3, 'cat_2': 0.3, 'cat_3': 0.3} # at least 30% to each category
sector_upper = {'cat_1': 0.4, 'cat_2': 0.4, 'cat_3': 0.4} # not more than 40% to each category

ef.add_sector_constraints(sector_mapper=sector_mapper, sector_lower=sector_lower, sector_upper=sector_upper) # add sector constraint

ef.add_objective(objective_functions.L2_reg, gamma=5) # L2 regularisation parameter, defaults to 1. This adds a small weights penalty, gamma, to the objective function. Increase for more non-negligible weights. 

optimal_weights_portfolio = ef.max_quadratic_utility(risk_aversion = 2*k) # optimize by the quadratic utility function mean - k/2 * variance (hence the 2*k in the parameters)
ef.portfolio_performance(verbose=True, risk_free_rate=0.01) 
results = pd.Series(np.round(np.array(list(optimal_weights_portfolio.values())),4),index = in_sample_window.columns)
print(results)
print('\n')
print(f'CAT 1: {results.loc[cat_1].sum()}, CAT 2: {results.loc[cat_2].sum()}, CAT 3: {results.loc[cat_3].sum()}')


k = 1.528
Expected annual return: 81.3%
Annual volatility: 46.0%
Sharpe Ratio: 1.75
ADI     0.0131
AEIS    0.0000
AMAT    0.0182
AMD     0.0664
AVGO    0.0086
CRUS    0.0275
DIOD    0.0350
ENPH    0.1194
FLEX    0.0000
IIVI    0.0234
INTC    0.0263
JBL     0.0000
LRCX    0.0616
MCHP    0.0133
MRVL    0.0524
MTSI    0.1102
MU      0.0269
MXL     0.0000
NVDA    0.1186
NXPI    0.0133
OLED    0.0000
ON      0.0000
QCOM    0.0222
SGH     0.0002
SMTC    0.0000
SYNA    0.0987
TSM     0.0593
TTMI    0.0128
TXN     0.0159
XLNX    0.0000
RF      0.0568
dtype: float64


CAT 1: 0.3355, CAT 2: 0.30000000000000004, CAT 3: 0.3078


In [201]:
# check against out of sample window
_ = base_optimizer.portfolio_performance(results, oos_mu, oos_S, verbose=True, risk_free_rate=0.01)

Expected annual return: 249.2%
Annual volatility: 29.8%
Sharpe Ratio: 8.33


# Attempt 3
- L2 regularization with gamma = 5
- Exponentially weighted mean and covariance
- Sub-sectors by market cap


In [202]:
adj_mu = expected_returns.ema_historical_return(in_sample_window, span=100, returns_data=True)
adj_S = risk_models.exp_cov(in_sample_window, span=100, returns_data=True)

# Shrinkage estimator, as recommended by PyPortfolioOpt's dev, overall barely any difference with normal cov
#adj_S = risk_models.CovarianceShrinkage(in_sample_window, returns_data=True).ledoit_wolf()

In [203]:
ef = EfficientFrontier(adj_mu, adj_S, solver= solver)
k = 1.528 
print(f"k = {k}")

ef.add_constraint(lambda x: x >= 0) # weights >= 0
ef.add_constraint(lambda x: x <= 0.25) # weight <= 0.25

sector_mapper = {} # initialise sector_mapper
for stock in cat_1:
  sector_mapper[stock] = 'cat_1'
for stock in cat_2:
  sector_mapper[stock] = 'cat_2'
for stock in cat_3:
  sector_mapper[stock] = 'cat_3'
sector_mapper['RF'] = 'rf'

sector_lower = {'cat_1': 0.3, 'cat_2': 0.3, 'cat_3': 0.3} # at least 30% to each category
sector_upper = {'cat_1': 0.4, 'cat_2': 0.4, 'cat_3': 0.4} # not more than 40% to each category

ef.add_sector_constraints(sector_mapper=sector_mapper, sector_lower=sector_lower, sector_upper=sector_upper) # add sector constraint

ef.add_objective(objective_functions.L2_reg, gamma=5) # L2 regularisation parameter, defaults to 1. This adds a small weights penalty, gamma, to the objective function. Increase for more non-negligible weights. 

optimal_weights_portfolio = ef.max_quadratic_utility(risk_aversion = 2*k) # optimize by the quadratic utility function mean - k/2 * variance (hence the 2*k in the parameters)
#ef.portfolio_performance(verbose=True, risk_free_rate=0.01) # can't use this anymore, its adjusted mean
_ = base_optimizer.portfolio_performance(results, mu, S, verbose=True, risk_free_rate=0.01)
results = pd.Series(np.round(np.array(list(optimal_weights_portfolio.values())),4),index = in_sample_window.columns)
print(results)
print('\n')
print(f'CAT 1: {results.loc[cat_1].sum()}, CAT 2: {results.loc[cat_2].sum()}, CAT 3: {results.loc[cat_3].sum()}')


k = 1.528
Expected annual return: 81.3%
Annual volatility: 46.0%
Sharpe Ratio: 1.75
ADI     0.0000
AEIS    0.0000
AMAT    0.0083
AMD     0.0000
AVGO    0.0381
CRUS    0.0000
DIOD    0.0000
ENPH    0.0723
FLEX    0.0000
IIVI    0.1398
INTC    0.0000
JBL     0.0000
LRCX    0.0523
MCHP    0.0146
MRVL    0.1303
MTSI    0.1094
MU      0.0000
MXL     0.1507
NVDA    0.2018
NXPI    0.0000
OLED    0.0000
ON      0.0305
QCOM    0.0437
SGH     0.0000
SMTC    0.0000
SYNA    0.0000
TSM     0.0080
TTMI    0.0000
TXN     0.0000
XLNX    0.0000
RF      0.0000
dtype: float64


CAT 1: 0.2999, CAT 2: 0.29999999999999993, CAT 3: 0.39990000000000003


In [204]:
# check against out of sample window
_ = base_optimizer.portfolio_performance(results, oos_mu, oos_S, verbose=True, risk_free_rate=0.01)

Expected annual return: 212.6%
Annual volatility: 34.1%
Sharpe Ratio: 6.21


# Attempt 4
- L2 regularization with gamma = 10
- Exponentially weighted mean and variance
- sub-sectors by market cap

In [205]:
adj_mu = expected_returns.ema_historical_return(in_sample_window, span=100, returns_data=True)
adj_S = risk_models.exp_cov(in_sample_window, span=100, returns_data=True)

ef = EfficientFrontier(adj_mu, adj_S, solver= solver)
k = 1.528 
print(f"k = {k}")

ef.add_constraint(lambda x: x >= 0) # weights >= 0
ef.add_constraint(lambda x: x <= 0.25) # weight <= 0.25

sector_mapper = {} # initialise sector_mapper
for stock in cat_1:
  sector_mapper[stock] = 'cat_1'
for stock in cat_2:
  sector_mapper[stock] = 'cat_2'
for stock in cat_3:
  sector_mapper[stock] = 'cat_3'
sector_mapper['RF'] = 'rf'

sector_lower = {'cat_1': 0.2, 'cat_2': 0.2, 'cat_3': 0.2} # at least 30% to each category
sector_upper = {'cat_1': 0.5, 'cat_2': 0.5, 'cat_3': 0.5} # not more than 40% to each category

ef.add_sector_constraints(sector_mapper=sector_mapper, sector_lower=sector_lower, sector_upper=sector_upper) # add sector constraint

ef.add_objective(objective_functions.L2_reg, gamma=10) # L2 regularisation parameter, defaults to 1. This adds a small weights penalty, gamma, to the objective function. Increase for more non-negligible weights. 

optimal_weights_portfolio = ef.max_quadratic_utility(risk_aversion = 2*k) # optimize by the quadratic utility function mean - k/2 * variance (hence the 2*k in the parameters)
#ef.portfolio_performance(verbose=True, risk_free_rate=0.01) # can't use this anymore, its adjusted mean
_ = base_optimizer.portfolio_performance(results, mu, S, verbose=True, risk_free_rate=0.01)
results = pd.Series(np.round(np.array(list(optimal_weights_portfolio.values())),4),index = in_sample_window.columns)
print(results)
print('\n')
print(f'CAT 1: {results.loc[cat_1].sum()}, CAT 2: {results.loc[cat_2].sum()}, CAT 3: {results.loc[cat_3].sum()}')
# check against out of sample window
print('\n')
_ = base_optimizer.portfolio_performance(results, oos_mu, oos_S, verbose=True, risk_free_rate=0.01)

k = 1.528
Expected annual return: 67.4%
Annual volatility: 51.2%
Sharpe Ratio: 1.30
ADI     0.0202
AEIS    0.0248
AMAT    0.0162
AMD     0.0033
AVGO    0.0316
CRUS    0.0000
DIOD    0.0074
ENPH    0.0714
FLEX    0.0000
IIVI    0.1077
INTC    0.0000
JBL     0.0000
LRCX    0.0606
MCHP    0.0414
MRVL    0.1008
MTSI    0.0915
MU      0.0047
MXL     0.1124
NVDA    0.1152
NXPI    0.0142
OLED    0.0000
ON      0.0490
QCOM    0.0344
SGH     0.0000
SMTC    0.0239
SYNA    0.0000
TSM     0.0164
TTMI    0.0000
TXN     0.0107
XLNX    0.0253
RF      0.0170
dtype: float64


CAT 1: 0.2278, CAT 2: 0.3876, CAT 3: 0.36769999999999997


Expected annual return: 204.8%
Annual volatility: 31.6%
Sharpe Ratio: 6.44


# Attempt 5
- L2 regularization with gamma = 5
- sub-sectors by market cap, with floor = 20%, ceiling = 50%

In [206]:
ef = EfficientFrontier(mu, S, solver= solver)
k = 1.528 
print(f"k = {k}")

ef.add_constraint(lambda x: x >= 0) # weights >= 0
ef.add_constraint(lambda x: x <= 0.25) # weight <= 0.25

sector_mapper = {} # initialise sector_mapper
for stock in cat_1:
  sector_mapper[stock] = 'cat_1'
for stock in cat_2:
  sector_mapper[stock] = 'cat_2'
for stock in cat_3:
  sector_mapper[stock] = 'cat_3'
sector_mapper['RF'] = 'rf'

sector_lower = {'cat_1': 0.2, 'cat_2': 0.2, 'cat_3': 0.2} # at least 30% to each category
sector_upper = {'cat_1': 0.5, 'cat_2': 0.5, 'cat_3': 0.5} # not more than 40% to each category

ef.add_sector_constraints(sector_mapper=sector_mapper, sector_lower=sector_lower, sector_upper=sector_upper) # add sector constraint

ef.add_objective(objective_functions.L2_reg, gamma=5) # L2 regularisation parameter, defaults to 1. This adds a small weights penalty, gamma, to the objective function. Increase for more non-negligible weights. 

optimal_weights_portfolio = ef.max_quadratic_utility(risk_aversion = 2*k) # optimize by the quadratic utility function mean - k/2 * variance (hence the 2*k in the parameters)
ef.portfolio_performance(verbose=True, risk_free_rate=0.01) 
results = pd.Series(np.round(np.array(list(optimal_weights_portfolio.values())),4),index = in_sample_window.columns)
print(results)
print('\n')
print(f'CAT 1: {results.loc[cat_1].sum()}, CAT 2: {results.loc[cat_2].sum()}, CAT 3: {results.loc[cat_3].sum()}')
_=base_optimizer.portfolio_performance(results, oos_mu, oos_S, verbose=True, risk_free_rate=0.01)

k = 1.528
Expected annual return: 81.2%
Annual volatility: 45.6%
Sharpe Ratio: 1.76
ADI     0.0067
AEIS    0.0017
AMAT    0.0211
AMD     0.0688
AVGO    0.0112
CRUS    0.0301
DIOD    0.0376
ENPH    0.1143
FLEX    0.0000
IIVI    0.0259
INTC    0.0288
JBL     0.0000
LRCX    0.0554
MCHP    0.0071
MRVL    0.0460
MTSI    0.1129
MU      0.0207
MXL     0.0000
NVDA    0.1213
NXPI    0.0072
OLED    0.0000
ON      0.0000
QCOM    0.0247
SGH     0.0028
SMTC    0.0000
SYNA    0.1011
TSM     0.0618
TTMI    0.0153
TXN     0.0184
XLNX    0.0000
RF      0.0590
dtype: float64


CAT 1: 0.3561, CAT 2: 0.25739999999999996, CAT 3: 0.3274
Expected annual return: 244.4%
Annual volatility: 29.7%
Sharpe Ratio: 8.20


## 3. Evaluation

Add any other relevant data.
Even if we don't need to compare old vs new portfolio, we still need this data for VaR calculations

In [76]:
ef.portfolio_performance(verbose=True, risk_free_rate=0.01) 
results

Expected annual return: 79.6%
Annual volatility: 50.4%
Sharpe Ratio: 1.56


ADI     0.0061
AEIS    0.0000
AMAT    0.0174
AMD     0.0719
AVGO    0.0115
CRUS    0.0293
DIOD    0.0363
ENPH    0.1065
FLEX    0.0000
IIVI    0.0283
INTC    0.0296
JBL     0.0000
LRCX    0.0492
MCHP    0.0039
MRVL    0.0493
MTSI    0.1097
MU      0.0190
MXL     0.0000
NVDA    0.1231
NXPI    0.0014
OLED    0.0000
ON      0.0000
QCOM    0.0268
SGH     0.0000
SMTC    0.0000
SYNA    0.1003
TSM     0.0677
TTMI    0.0160
TXN     0.0211
XLNX    0.0000
RF      0.0757
dtype: float64

In [77]:
original_portfolio = pd.read_csv('original_weights_best.csv')
#original_portfolio['Security\'s Percentage of the Total Net Assets'] = np.round(original_portfolio['Security\'s Percentage of the Total Net Assets'],3)/100
original_portfolio.index = original_portfolio['Ticker Symbol Given by the Exchange']
del original_portfolio['Ticker Symbol Given by the Exchange']
original_portfolio = original_portfolio.loc[top_30,]

In [78]:
# normalize top 30 weights
original_portfolio['Security\'s Percentage of the Total Net Assets'] = (original_portfolio['Security\'s Percentage of the Total Net Assets']/original_portfolio['Security\'s Percentage of the Total Net Assets'].sum())

In [79]:
# performance to beat
_=base_optimizer.portfolio_performance(original_portfolio['Security\'s Percentage of the Total Net Assets'], oos_mu[oos_mu.index != 'RF'], oos_S.iloc[oos_S.index != 'RF',oos_S.columns != 'RF'], verbose=True, risk_free_rate=0.01)

Expected annual return: 94.7%
Annual volatility: 26.0%
Sharpe Ratio: 3.61


In [80]:
# performance of new portfolio weights
_=base_optimizer.portfolio_performance(results, oos_mu, oos_S, verbose=True, risk_free_rate=0.01)

Expected annual return: 235.4%
Annual volatility: 29.1%
Sharpe Ratio: 8.05


In [86]:
comparison_table = pd.DataFrame([original_portfolio['Security\'s Percentage of the Total Net Assets'],results],index=['original weights', 'new weights']).T
comparison_table.loc['RF','original weights'] = 0
comparison_table

Unnamed: 0,original weights,new weights
SYNA,0.005068,0.1003
MRVL,0.055538,0.0493
SGH,0.004757,0.0
ENPH,0.029372,0.1065
JBL,0.014479,0.0
CRUS,0.021512,0.0293
MCHP,0.049436,0.0039
AEIS,0.00755,0.0
MTSI,0.007033,0.1097
AMAT,0.022029,0.0174


# Q4 VaR and ES


In [188]:
def get_historical_risk(returns_df, rolling_window_size, dates, weights):
    '''
    Function to obtain the historical risk given the dates provided. Assumes 5% VaR and 5% ES.
    
    Parameters:
    - returns_df (pd.DataFrame): Dataframe of the daily percentage returns. Tickers in the columns, and dates in the index. 
    - rolling_window_size (int): Integer ditating the size of the rolling window to consider when calculating the historical risk.
    - dates (list-like): Integer or list-like object containing the dates to calculate historical VaR for.
    - weights (pd.Series): Pandas series object containing the weights of each investment. Index is the ticker names for each investment.
    
    Returns:
    - risk_df (pd.DataFrame): Dataframe of the VaR and ES of each provided date. Dates are the index.
    '''
    
    if isinstance(dates, dict):
        if ('start_date' or 'end_date') not in dates.keys():
            raise ValueError("Missing start_date or end_date variable.")
        start_date = dates['start_date']
        end_date = dates['end_date']
        dates = returns_df[(returns_df.index>=start_date)&(returns_df.index<=end_date)].index.values
    
    elif not isinstance(dates, list):
        dates = [dates,]
    
    not_found = [date for date in dates if date not in returns_df.index]
    if any(not_found):
        raise ValueError(f"{not_found} not found in provided data")
        
    historical_returns = (returns_df*weights).sum(axis=1) # calculate the historical returns of the portfolio after weighting
    historical_var = np.zeros(len(dates)) # create empty array for fast data read/write
    historical_ES = np.zeros(len(dates)) # create empty array for fast data read/write
    
    for day_index in range(len(dates)): # for each date
        date = dates[day_index]
        rolling_window = historical_returns[historical_returns.index<date].iloc[-rolling_window_size:]
        historical_var[day_index] = np.percentile(rolling_window,5) # get 5% percentile for VaR
        sum_of_loss_returns = np.sum(rolling_window.iloc[np.where(rolling_window < historical_var[day_index])]) # formula for ES, select all values lower than the 5% VaR and sum them.
        historical_ES[day_index] = sum_of_loss_returns/(rolling_window_size*0.05) # formula for ES
    
    risk_df = pd.DataFrame([historical_var,historical_ES],index=['5% VaR', '5% ES'],columns=dates).T
    
    return risk_df
        

In [191]:
historical_risk_values = get_historical_risk(returns_df=simple_returns, rolling_window_size=250, dates={"start_date":"2019-1-1","end_date":"2019-12-31"}, weights=results)
historical_risk_values

Unnamed: 0,5% VaR,5% ES
2019-01-02,-0.031317,-0.043424
2019-01-03,-0.031317,-0.043424
2019-01-04,-0.032153,-0.045187
2019-01-07,-0.032153,-0.045187
2019-01-08,-0.032153,-0.045187
...,...,...
2019-12-24,-0.023719,-0.035606
2019-12-26,-0.023719,-0.035606
2019-12-27,-0.023719,-0.035606
2019-12-30,-0.023719,-0.035606


In [227]:
# NOTE: Not sure if we need to log for force normality
from scipy.stats import norm
def get_parametric_risk(returns_df, rolling_window_size, dates, weights):
    '''
    Function to obtain the parametric risk given the dates provided. Assumes 5% VaR and 5% ES, and normal distribution.
    
    Parameters:
    - returns_df (pd.DataFrame): Dataframe of the daily percentage returns. Tickers in the columns, and dates in the index. 
    - rolling_window_size (int): Integer ditating the size of the rolling window to consider when calculating the historical risk.
    - dates (list-like): Integer or list-like object containing the dates to calculate historical VaR for.
    - weights (pd.Series): Pandas series object containing the weights of each investment. Index is the ticker names for each investment.
    
    Returns:
    - risk_df (pd.DataFrame): Dataframe of the VaR and ES of each provided date. Dates are the index.
    '''
    
    if isinstance(dates, dict):
        if ('start_date' or 'end_date') not in dates.keys():
            raise ValueError("Missing start_date or end_date variable.")
        start_date = dates['start_date']
        end_date = dates['end_date']
        dates = returns_df[(returns_df.index>=start_date)&(returns_df.index<=end_date)].index.values
    
    elif not isinstance(dates, list):
        dates = [dates,]
    
    not_found = [date for date in dates if date not in returns_df.index]
    if any(not_found):
        raise ValueError(f"{not_found} not found in provided data")
        
    parametric_returns = (returns_df*weights).sum(axis=1) # calculate the historical returns of the portfolio after weighting
    parametric_var = np.zeros(len(dates)) # create empty array for fast data read/write
    parametric_ES = np.zeros(len(dates)) # create empty array for fast data read/write
    
    for day_index in range(len(dates)): # for each date
        date = dates[day_index]
        
        individual_window = returns_df[returns_df.index<date].iloc[-rolling_window_size:].values # collect the values for the logreturns for each individual stock, inclusive of the day itself
        
        # calculate mean
        mu = expected_returns.mean_historical_return(individual_window, returns_data=True) 
        mean_return = (mu.values*weights).sum()
        
        # calculate variance 
        S = risk_models.sample_cov(individual_window, returns_data=True) # get covariance matrix
        variance_return = np.dot(weights, np.dot(S, weights)) # calculate portfolio variance

        # calculate VaR and ES
        volatility = variance_return**0.5
        z_alpha = norm.ppf(0.05)
        parametric_var[day_index] = mean_return + volatility*z_alpha
        parametric_ES[day_index] = mean_return-volatility*(norm.pdf(norm.ppf(0.05))/0.05)
    
    risk_df = pd.DataFrame([parametric_var,parametric_ES],index=['5% VaR', '5% ES'],columns=dates).T
    
    return risk_df

In [228]:
parametric_risk_values = get_parametric_risk(returns_df=simple_returns, rolling_window_size=250, dates={"start_date":"2019-1-1","end_date":"2019-12-31"}, weights=results)
parametric_risk_values

-0.0477888139088524
0.3030346286413673
1.133986988637231
0.26231751103328804




Unnamed: 0,5% VaR,5% ES
2019-1-2,-0.546236,-0.672862
2019-12-31,0.702513,0.592901
