Disclaimer: I am not a financial advisor. Do not take anything on this code as financial advice, ever.
Do your own research.
Consult a professional investment advisor before making any investment decisions!

This code uses the Markowitz Efficient Frontier to optmize a stock portfolio.
- [Efficient Frontier](https://www.investopedia.com/terms/e/efficientfrontier.asp)
- [Markowitz Efficient Set](https://www.investopedia.com/terms/m/markowitzefficientset.asp)
- [Efficient Frontier - Wikipedia](https://en.wikipedia.org/wiki/Efficient_frontier)

## Dependencies

In [0]:
'''
!pip install investpy
!pip install pandas
!pip install yfinance
!pip install PyPortfolioOpt
!pip install vectorbt
'''


## Imports

In [0]:
import investpy as inv
import pandas as pd
import yfinance as yf
from collections import OrderedDict
import matplotlib.pyplot as plt
import numpy as np
from pypfopt.efficient_frontier import EfficientFrontier
from pypfopt import CovarianceShrinkage, risk_models, expected_returns, objective_functions, plotting
from pypfopt.discrete_allocation import DiscreteAllocation, get_latest_prices
import vectorbt as vbt

## Config

In [0]:
portfolio_value = 20000

## Stock List

In [0]:
#lista_tickers = inv.get_stocks_list('brazil')

#source: https://infograficos.valor.globo.com/carteira-valor/
#november-22 
'''
Removed:
1 Failed download:
- AURE3.SA: Data doesn't exist for startDate = 1514775600, endDate = 1640919600
'''

lista_tickers = ['ITUB4','VALE3', 'B3SA3','MULT3','BBDC4','LREN3','BPAC11','ITSA4','ALSO3', 'ANIM3','CYRE3','SBSP3','SOMA3','RENT3','MULT3','RDOR3','CCRO3','CIEL3',
'CASH3','AGRO3','HYPE3', 'PRIO3', 'UNIP6', 'ELET3', 'PCAR3', 'RDOR3', 'WEGE3', 'ABEV3', 'LREN3', 'TOTS3','CYRE3', 'GGBR4', 'RADL3', 'COGN3', 'CURY3','MGLU3','KLBN11','MRVE3', 'YDUQ3',
'BBDC4', 'ELET3', 'MULT3', 'STBP3', 'VBBR3', 'CCRO3', 'MDIA3', 'VBBR3', 'ALSO3', 'BPAC11', 'UGPA3', 'SIMH3', 'CPLE6', 'ODPV3', 'TAEE11', 'HYPE3', 'PSSA3', 'TOTS3', 'AMER3',
'SLCE3', 'GGBR4', 'EMBR3', 'MYPK3', 'TRPL4', 'SLCE3', 'ALSO3', 'EGIE3', 'RANI3', 'RAPT4', 'EQTL3', 'JBSS3', 'ASAI3', 'IGTI11' ]


lista_tickers = list(set(lista_tickers))
#lista_tickers = list(OrderedDict.fromkeys(lista_tickers))

for i in range(len(lista_tickers)):
    lista_tickers[i] = lista_tickers[i] +'.SA'
    
len(lista_tickers)

In [0]:
df = yf.download(lista_tickers, start='2021-11-22', end='2022-05-22')['Adj Close']
df = df.dropna()
df.head()

In [0]:
#out of samples
df_out = yf.download(lista_tickers, start='2022-05-23', end='2022-11-23')['Adj Close']
df_out = df_out.dropna()
df_out.head()

## Efficient Frontier 1

In [0]:

mean = expected_returns.mean_historical_return(df)

s = CovarianceShrinkage(df).ledoit_wolf()

In [0]:
corr = s.corr()
corr.style.background_gradient(cmap='coolwarm', axis = None).format(precision=2)

In [0]:
f = plt.figure(figsize=(19, 15))
plt.matshow(s.corr(), fignum=f.number)
plt.xticks(range(s.select_dtypes(['number']).shape[1]), s.select_dtypes(['number']).columns, fontsize=14, rotation=90)
plt.yticks(range(s.select_dtypes(['number']).shape[1]), s.select_dtypes(['number']).columns, fontsize=14)
cb = plt.colorbar()
cb.ax.tick_params(labelsize=14)
plt.title('Correlation Matrix', fontsize=16);

In [0]:
#Expected returns

exp_return = expected_returns.mean_historical_return(df)
exp_return

In [0]:
#Sharp ratio

ef = EfficientFrontier(mean, s)

#para não alocar recursos nas ações que estão zeradas e manter apenas as relevantes
ef.add_objective(objective_functions.L2_reg, gamma = 0.1)

w = ef.max_sharpe()

cleaned_weights  = ef.clean_weights()
cleaned_weights_ = cleaned_weights.values()
cleaned_weights_ = list(cleaned_weights_)
cleaned_weights_ = np.array(cleaned_weights_)

print(cleaned_weights)

In [0]:
ef.portfolio_performance(verbose=True)

In [0]:
latest_prices = get_latest_prices(df)
da = DiscreteAllocation(w, latest_prices, total_portfolio_value=portfolio_value)
allocation, leftover = da.lp_portfolio()
print('Qtd ações alocadas: ',allocation)
print('Sobrou: R$',leftover)

## Efficient Frontier 2

In [0]:
mean_2 = expected_returns.mean_historical_return(df_out)

s_2 = CovarianceShrinkage(df_out).ledoit_wolf()

In [0]:
corr_2 = s_2.corr()
corr_2.style.background_gradient(cmap='coolwarm', axis = None).format(precision=2)

In [0]:
f_2 = plt.figure(figsize=(19, 15))
plt.matshow(s_2.corr(), fignum=f_2.number)
plt.xticks(range(s_2.select_dtypes(['number']).shape[1]), s.select_dtypes(['number']).columns, fontsize=14, rotation=90)
plt.yticks(range(s_2.select_dtypes(['number']).shape[1]), s.select_dtypes(['number']).columns, fontsize=14)
cb_2 = plt.colorbar()
cb_2.ax.tick_params(labelsize=14)
plt.title('Correlation Matrix: Out of Sample', fontsize=16);

In [0]:
exp_return_2 = expected_returns.mean_historical_return(df_out)
exp_return_2

In [0]:
#Sharp ratio

ef_2 = EfficientFrontier(mean_2, s_2)

#para não alocar recursos nas ações que estão zeradas e manter apenas as relevantes
ef_2.add_objective(objective_functions.L2_reg, gamma = 0.1)

w_2 = ef_2.max_sharpe()

cleaned_weights_2  = ef_2.clean_weights()
cleaned_weights_2_ = cleaned_weights_2.values()
cleaned_weights_2_ = list(cleaned_weights_2_)
cleaned_weights_2_ = np.array(cleaned_weights_2_)

print(cleaned_weights_2)

In [0]:
ef_2.portfolio_performance(verbose=True)

In [0]:
latest_prices_2 = get_latest_prices(df_out)
da_2 = DiscreteAllocation(w_2, latest_prices_2, total_portfolio_value=portfolio_value)
allocation_2, leftover_2 = da_2.lp_portfolio()
print('Qtd ações alocadas: ',allocation_2)
print('Sobrou: R$',leftover_2)

# Backtest

We will fist try to backtest the strategy using the cleaned weights from the first dataset (Start 2021-11-22, End 2022-05-20) and will result in a overfit, because the cleaned weights was calculated using the same dataset. Then we will move the the out of sample dataset (same stocks different period) (Start 2022-05-23, End 2022-11-22) and reproduce the same steps.

## Backtest 1

Using the first data sample (overfitting)

In [0]:
opt_weights = np.full_like(df, np.nan)
opt_weights[0,:] = cleaned_weights_
print(opt_weights.shape)


In [0]:
#Vectorbt Backtest

pyopt_pf = vbt.Portfolio.from_orders(
    close = df,
    size = opt_weights,
    size_type = 'targetpercent',
    group_by = True,
    cash_sharing = True,
    freq = 'd'
)

In [0]:
pyopt_pf.stats()

In [0]:
pyopt_pf.plot(width=800, height=400, title='Overfit Backtest: Cumulative Returns').show();

In [0]:
pyopt_pf.plot_underwater(width=800, height=400, title='Overfit Backtest: Drawdown').show();

In [0]:
pyopt_pf.drawdowns.plot(top_n=3, width=800, height=400, title='Overfit Backtest: Drawdown').show()

## Backtest 2

Using out of sample data with cleaned weights from the first dataset (to prevent overfitting)

In [0]:
opt_weights = np.full_like(df_out, np.nan)
opt_weights[0,:] = cleaned_weights_
print(opt_weights.shape)

In [0]:
#Vectorbt Backtest

pyopt_pf = vbt.Portfolio.from_orders(
    close = df_out,
    size = opt_weights,
    size_type = 'targetpercent',
    group_by = True,
    cash_sharing = True,
    freq = 'd'
)

In [0]:
pyopt_pf

In [0]:
pyopt_pf.stats()

In [0]:
pyopt_pf.plot(width=800, height=400, title='Out of Sample Backtest: Cumulative Returns').show();

In [0]:
pyopt_pf.plot_underwater(width=800, height=400, title='Out of Sample Backtest: Drawdown').show();

In [0]:
pyopt_pf.drawdowns.plot(top_n=3, width=800, height=400, title='Out of Sample Backtest: Drawdown').show()