# Pack de portefeuille

Sur un article de [Medium](https://medium.com/sfu-cspmp/predicting-stable-portfolios-using-machine-learning-f2e27d6dbbec), ils ont proposés de sélectionner des packets de titres qui sont peu corrélés entre-eux.

Nous allons nous en inspier, pour construire des pack de titres peu corrélés. De ces packs, on va générer des portefeuilles (environ 10.000).

Dans un premer temps nous allons récupérer des packs de titres peu corrélés, puis s'ne servire pour générer les portefeuilles.

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import matplotlib as mpl
import seaborn as sns

import itertools
import random

In [2]:
data= pd.read_csv("../data/return.csv")
data.head()

Unnamed: 0,HES US Equity,NEM US Equity,INTC US Equity,IFF US Equity,MOS US Equity,EXC US Equity,KMB US Equity,JNJ US Equity,BAX US Equity,F US Equity,...,IR US Equity,BKR US Equity,WEC US Equity,OXY US Equity,WMB US Equity,UNP US Equity,WM US Equity,CCL US Equity,Date1,Date2
0,-4.479339,-9.406045,-4.901961,-3.330092,-10.41353,-6.650297,-6.780725,-2.406459,-1.857532,8.4,...,-9.177392,12.21877,-1.786073,-3.700062,-1.13852,-5.320814,-5.205561,5.175134,2009-12-31 00:00:00,2010-01-31 00:00:00
1,1.747707,14.979,6.671663,5.883832,9.216614,-3.945733,2.27311,0.992332,-1.146032,8.302583,...,-1.474259,5.830389,-0.212952,1.927495,3.358925,11.80492,3.026521,8.208021,2010-01-31 00:00:00,2010-02-28 00:00:00
2,6.552083,3.553801,8.572821,13.82579,4.07604,1.177829,4.66159,3.492064,2.732195,7.069847,...,9.276089,-2.253756,2.023539,6.302256,7.761559,8.802137,5.254409,8.120133,2010-02-28 00:00:00,2010-03-31 00:00:00
3,1.598721,10.11192,2.467474,5.076568,-15.84663,-0.502169,-2.576336,-1.380368,-18.86598,3.579952,...,6.051047,6.233988,6.274034,4.873433,2.207792,3.219645,0.726111,7.253087,2010-03-31 00:00:00,2010-04-30 00:00:00
4,-16.28639,-4.029957,-5.550986,-11.15991,-9.626484,-10.34488,-0.914137,-8.501832,-10.56756,-9.907834,...,0.892374,-23.10122,-5.965246,-6.936612,-16.349,-5.146491,-5.353629,-12.88282,2010-04-30 00:00:00,2010-05-31 00:00:00


In [4]:
sp= pd.read_csv("../data/sp500.csv")
sp.head()

Unnamed: 0,Date,Open,High,Low,Close,Adj Close,Volume
0,2009-12-01,1098.890015,1130.380005,1085.890015,1115.099976,1115.099976,89515330000
1,2010-01-01,1116.560059,1150.449951,1071.589966,1073.869995,1073.869995,90947580000
2,2010-02-01,1073.890015,1112.420044,1044.5,1104.48999,1104.48999,84561340000
3,2010-03-01,1105.359985,1180.689941,1105.359985,1169.430054,1169.430054,103683550000
4,2010-04-01,1171.22998,1219.800049,1170.689941,1186.689941,1186.689941,116741910000


In [5]:
tickers= [i for i in data.columns if i not in ["Date1", "Date2"]]
data_tickers= data[tickers]

data_corr= data_tickers.corr()

# fig= plt.figure(figsize= (10, 10))
# sns.heatmap(data_corr, vmin=-1, vmax=1, center=0
#             ,cmap=sns.diverging_palette(20, 220, n=200), square=True)
# plt.show()

In [6]:
# data_corr.head()

In [7]:
mean_corr= data_corr.mean(axis= 0).values
mean_corr_seuil= 0.25 - data_corr.mean(axis= 0).values # seuil de 0.5 de corr

data_corr2= data_corr - data_corr.mean(axis= 0).values
# data_corr2.head()

In [8]:
# data_corr2[np.logical_and(data_corr2 < mean_corr, data_corr2 < mean_corr_seuil)]

In [9]:
data_corr3= np.logical_and(data_corr2 < mean_corr, data_corr2 < mean_corr_seuil)
# data_corr3.head()

In [10]:
data_corr5= data_corr3*1
# data_corr5.head()

In [11]:
data_corr4= data_corr3.apply(lambda x: x * data_corr3.index).replace({"":"0"})
# data_corr4
# raisonnement en colonne !!!!!

In [15]:
# on génère une liste de titre qui peuvent
# qui peuvent intéragir entre eux
random.seed(55)

num_max_assets= 7 # par portefeuille il y aura au max 5 titres
port_per_iter= 200 # 

portfolio_pairing= []
for ticker in tickers:
    possible_pairing= [i for i in data_corr4[ticker].values if i != "0"]
    stocked= [ticker]
    for val in possible_pairing:
        if (data_corr5.loc[ticker, val] == 1) and (data_corr5.loc[val, ticker] == 1):
            stocked.append(val)
        else:
            continue
    portfolio_pairing.append(stocked)

all_portfolio= []
for x in range(len(portfolio_pairing)):
    stock= []
    for i in range(2, num_max_assets + 1):
        for j in itertools.combinations(portfolio_pairing[x], i):
            stock.append(list(j))

    port_random= random.sample(stock, port_per_iter)

    all_portfolio += port_random

In [17]:
all_portfolio[:5]

[['DUK US Equity', 'PPL US Equity', 'MO US Equity'],
 ['EXC US Equity', 'DUK US Equity', 'HSY US Equity', 'WELL US Equity'],
 ['HES US Equity', 'DUK US Equity', 'PPL US Equity', 'HSY US Equity'],
 ['HES US Equity',
  'EXC US Equity',
  'KMB US Equity',
  'DUK US Equity',
  'PPL US Equity',
  'DTE US Equity',
  'WELL US Equity'],
 ['HES US Equity',
  'EXC US Equity',
  'PPL US Equity',
  'DTE US Equity',
  'WELL US Equity']]

In [18]:
# Save
df_portfolio= pd.DataFrame({"portfolio":all_portfolio})
df_portfolio.to_csv("../data/portfolio_constitution_short_long.csv", index= False, header= True)