<a href="https://colab.research.google.com/github/Valdi35/Markowitz_python/blob/main/Markowitz.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Librairies, Données ...

In [1]:
!pip install yfinance

Collecting yfinance
  Downloading yfinance-0.1.67-py2.py3-none-any.whl (25 kB)
Collecting lxml>=4.5.1
  Downloading lxml-4.6.4-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_24_x86_64.whl (6.3 MB)
[K     |████████████████████████████████| 6.3 MB 13.3 MB/s 
Installing collected packages: lxml, yfinance
  Attempting uninstall: lxml
    Found existing installation: lxml 4.2.6
    Uninstalling lxml-4.2.6:
      Successfully uninstalled lxml-4.2.6
Successfully installed lxml-4.6.4 yfinance-0.1.67


In [2]:
!pip install plotly.express 

Collecting plotly.express
  Downloading plotly_express-0.4.1-py2.py3-none-any.whl (2.9 kB)
Installing collected packages: plotly.express
Successfully installed plotly.express-0.4.1


In [3]:
#Import librairies
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import yfinance as yf
import datetime
from datetime import date
import bs4 as bs
import requests
import plotly_express as px

Download data

In [4]:
resp = requests.get('https://en.wikipedia.org/wiki/List_of_S%26P_500_companies')
soup = bs.BeautifulSoup(resp.text, 'lxml')
table = soup.find('table',{'class':'wikitable sortable'})
tickers = []
names = []
Sector = []
for row in table.findAll('tr')[1:]:
  ticker = row.findAll('td')[0].text
  name = row.findAll('td')[1].text
  secteur = row.findAll('td')[3].text
  tickers.append(ticker)
  names.append(name)
  Sector.append(secteur)

In [5]:
tickers = list(map(lambda s: s.strip(), tickers))
names = list(map(lambda s: s.strip(), names))
Sector = list(map(lambda s: s.strip(), Sector))

#Fichier avec toutes les informations
tickerdf = pd.DataFrame(tickers,columns=['ticker'])
namesdf = pd.DataFrame(names,columns=['Nom'])
Sectordf = pd.DataFrame(Sector,columns=['Industry'])

fichier = pd.concat([tickerdf, namesdf, Sectordf], axis=1)

In [6]:
start= datetime.datetime(2021,1,1)
data = yf.download(tickers ,start=start, period='1d')
#On conserve uniquement le prix ajuste
data = data['Adj Close']

[*********************100%***********************]  505 of 505 completed

2 Failed downloads:
- BRK.B: No data found, symbol may be delisted
- BF.B: None


In [7]:
data.drop(["BRK.B","BF.B"], axis=1, inplace=True)

In [8]:
data= pd.DataFrame(data, index=data.index)
data = data.fillna(method='ffill')

# Rendement, variance

In [9]:
AR = np.log(data/data.shift(1))

In [10]:
AR.dropna(inplace=True)

In [11]:
assetReturn = np.mean(AR)
assetVar = np.var(AR)

In [12]:
fig = px.scatter(x=assetVar,y=assetReturn,hover_name=assetReturn.index,width=800, height=600)

fig.update_layout(
    title="Rendements des actifs en fonction de la variance",
    xaxis_title="Variance",
    yaxis_title="Rendements"
)

fig.show()

# Formulation à rendement donné

In [13]:
#Objectifs de rendements totale = 11%
rho = 0.11

#Montant investi dans le portefeuille
S = 1
e = np.ones(len(assetReturn))

#Determiner un poids aleatoire pour chaque actif compris entre 0 et 1
#La somme des poids doit etre egale a 1 --> portefeuille equipondere
w = np.array([[1/len(assetReturn)] * len(assetReturn)])

#Matrice de variance-covariance
assetCov = np.cov(AR.transpose())

In [14]:
#Contraintes :
"""
- Poids >= 0 , pas de vente a decouvert

- Poids * rendements moyens doivent etre superieur ou egale a l'objectifs de rendements

- La somme des poids doit etre egale a la somme investi S
"""

#Algorithme d'optimisation
from scipy.optimize import LinearConstraint, Bounds, minimize
bounds = Bounds(0, 1)
linear_constraints = LinearConstraint([assetReturn,e],[rho,0.9],[np.inf,1.1])

def optimRisk(x,sigma):
  """
  Sigma : Covariance matrix of portfolio assets
  x : initially weighted portfolio 
  """
  return np.dot(np.dot(x,sigma),x.T)

res = minimize(
    optimRisk,
    x0=w.T,
    args=assetCov,
    method='SLSQP',
    constraints=linear_constraints,
    bounds=bounds
)

In [15]:
opt_weight = res.x
opt_weight.shape

(503,)

Calcul de la variance du portefeuille optimale

In [16]:
#Le risque global quand le niveau de rendement exige est de 8%
globalRiskPort = res.fun
globalRiskPort

0.000755892192810678

Visualisation de la repartition des actifs

In [17]:
d = {'return': assetReturn.values, 'variance': assetVar.values, 'weight':opt_weight}
df_1 = pd.DataFrame(data=d, index=assetReturn.index)

In [18]:
fig = px.scatter(df_1, x='variance',y='return',color='weight'
                 ,width=800, height=600)

fig.update_layout(
    title="Rho = 0.11",
    xaxis_title="Variance",
    yaxis_title="Rendements"
)

fig.show()

# Formulation à risque controlé

In [19]:
#On fixe le niveau de risque attendu
sigDiag = np.diag(assetCov)
riskTarget = 0.008
bounds = Bounds(0, 1)
linear_constraints = LinearConstraint([sigDiag,e],[0,S-0.001],[riskTarget,S+0.001])

def optimReturn(x,sigma,r):
  """
  Sigma : Covariance matrix of portfolio assets
  x : initially weighted portfolio 
  """
  return np.dot(np.dot(x,sigma),x.T) - np.dot(w,r)

res1 = minimize(
    optimReturn,
    x0=w.T,
    args=(assetCov,assetReturn),
    constraints=linear_constraints,
    bounds=bounds
)

In [24]:
#La rendement du portefeuille obtenu
returnPort = res.fun

In [21]:
opt_weight_2 = res.x

In [22]:
df_1['weight2'] = opt_weight_2

In [23]:
fig = px.scatter(df_1, x='variance',y='return',color='weight2'
                 ,width=800, height=600)

fig.update_layout(
    title="Sigma : 0.008",
    xaxis_title="Variance",
    yaxis_title="Rendements"
)

fig.show()

# Analyse sur les 10 meilleures actifs

In [26]:
#extract 10 best
top_10 = df_1.nlargest(10,'weight2')

In [27]:
#Returns & variance
assetReturn_10 = top_10['return']
assetVar_10 = top_10['variance']

In [28]:
#Params
cov_mat = np.zeros((10,10))

In [29]:
#Matrice de covariance entre les 10 meilleures actifs
for i in range(len(assetReturn_10)):
  for j in range(len(assetReturn_10)):
    cov_mat[i,j] = np.cov([assetReturn_10.iloc[i],assetReturn_10.iloc[j]])

In [31]:
sigDiag_10 = np.diag(cov_mat)
riskTarget = 0.002
bounds_10 = Bounds(0, 1)
e_10 = np.ones(len(assetReturn_10))
w_10 = np.array([[1/len(assetReturn_10)] * len(assetReturn_10)])

linear_constraints = LinearConstraint([sigDiag_10,e_10],[0,S-0.001],[riskTarget,S+0.001])

def optimReturn(x,sigma,r):
  """
  Sigma : Covariance matrix of portfolio assets
  x : initially weighted portfolio 
  """
  return np.dot(np.dot(x,sigma),x.T) - np.dot(x,r)

res_10 = minimize(
    optimReturn,
    x0=w_10.T,
    args=(cov_mat,assetReturn_10),
    constraints=linear_constraints,
    bounds=bounds_10
)

In [32]:
w_10 = res_10.x

In [35]:
top_10['optimWeight'] = w_10

In [37]:
fig = px.scatter(top_10, x='variance',y='return',color='optimWeight'
                 ,width=800, height=600)

fig.update_layout(
    title="Analyse sur les 10 meilleures actifs",
    xaxis_title="Variance",
    yaxis_title="Rendements"
)

fig.show()

Les limites de l'approche de Markowitz constaté ici : 

- L’optimisation par le critère rendement/risque est très sensible aux variations du rendement 
espéré du portefeuille. En effet, plus l'actif a un rendement moyen élevé sur l'historique choisi, plus sa part augmente dans le portefeuille optimale.

# Formulation pénalisée

La formulation pénalisé a pour but de réduire le nombre de produits actifs qui ne permettent pas d'augmenter le rendement du portefeuille. Le paramètre lambda est le paramètre de régularisation, au lieu de rendre les poids petits comme constaté précédemment. Plus lambda est élevé, plus on néglige la corrélation entre les actifs

In [39]:
bounds = Bounds(0, 1)
linear_constraints = LinearConstraint(e,S-0.001,S+0.001)

def ridgeOptim(x,sigma,r, lbd):
  """
  Sigma : Covariance matrix of portfolio assets
  x : initially weighted portfolio 
  r : Le vecteur de rendement moyen des actifs
  lambda : paramètre de régularisation
  """
  return - (np.dot(w,r) - np.dot(np.dot(x,sigma),x.T) - lbd * (np.dot(np.dot(x,np.eye(len(x))),x.T)))

In [53]:
#Résoudre le problème pour différentes valeurs de lambda
l = np.linspace(0.001,0.01,10)
results = np.zeros((len(assetReturn),10))

for i in l:
  j = 0
  res = minimize(
    ridgeOptim,
    x0=w.T,
    args=(assetCov,assetReturn,i),
    constraints=linear_constraints,
    bounds=bounds)
  results[:,j] = res.x
  j += 1


In [56]:
penalizedOptim = pd.DataFrame(data=results,index=assetReturn.index,
                              columns = [i for i in l])

In [57]:
penalizedOptim.head()

Unnamed: 0,0.001,0.002,0.003,0.004,0.005,0.006,0.007,0.008,0.009,0.010
A,0.002847,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
AAL,0.000455,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
AAP,0.001905,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
AAPL,0.002624,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
ABBV,0.002982,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
