In [8]:
""" This notebook 
(1) takes optimal portfolios from the file log_opt_portf_NYSE_2.txt, which correspond to n_experiment=30 runs of the GDSEG algorithm 
(2) drops small weights and normalizes the remaining ones
(3) constructs the table containing optimal weights, total wealth and annual return for each portolio
(4) constructs the table containing minimal and maximal values of each weight in these portfolios (Table 3 of the paper)
The file log_opt_portf_NYSE_2.txt should be constructed by NYSE_2_log_GDSEG.ipynb beforehand
"""
import pandas as pd
import numpy as np
from scipy import stats

In [9]:
# Importing NYSE_2 dataset
stocks=pd.read_csv('NYSE_2.csv')
print(stocks.shape)
stocks.head()

(11178, 19)


Unnamed: 0,ahp,alcoa,amerb,coke,dow,dupont,ford,ge,gm,hp,ibm,inger,jnj,kimbc,merck,mmm,morris,pandg,schlum
0,1.01515,1.02765,1.04183,1.00637,1.00847,1.01983,1.0,1.0,1.01026,1.01935,1.00429,1.01357,0.99683,1.0534,1.03148,1.03377,1.01495,1.00775,1.01176
1,1.01493,1.04036,0.98905,1.00475,1.0084,1.00833,1.00157,1.02187,0.99746,1.01266,0.99573,1.00446,1.00318,1.00461,1.00898,1.00251,1.0,1.00192,1.01938
2,1.0,0.97629,0.97786,0.98583,0.99722,0.99449,0.98116,0.9786,0.98219,0.98125,0.98571,0.99556,0.95873,0.98165,0.98043,0.9599,0.97218,0.98656,0.97338
3,1.02451,1.00662,1.02642,1.01917,0.99443,1.00693,1.0272,1.00795,0.98705,1.00637,1.01522,1.0,1.01325,0.98131,1.01089,1.03655,0.99663,1.00778,1.0
4,1.031,0.98465,1.00368,1.00313,1.02801,1.00413,1.04361,1.00394,1.00525,1.03165,1.02427,1.01563,1.00654,1.02381,1.01077,0.99496,0.98649,1.01158,1.01563


In [10]:
# r: array for stock returns
N=stocks.shape[0]
d=stocks.shape[1]
r=np.zeros((N,d))
r=stocks.to_numpy()

In [11]:
def refine(w,tol=1/10**3):
    """ Drops small weights and normalizes the remaining ones """
    d=w.shape[0]
    opt_num=[i for i in range(d) if w[i]>tol]
    w=w[opt_num]/np.sum(w[opt_num])    
    return opt_num, w

In [12]:
# Take optimal portfolio weights from the file log_opt_portf_NYSE_2.txt
f=open('log_opt_portf_NYSE_2.txt','r')
n_experiments=sum(1 for line in f)
f.close()
opt_portf=np.zeros((n_experiments,d))
f=open('log_opt_portf_NYSE_2.txt','r')
s=-1
for line in f:
    s+=1
    opt_portf[s,:]=[float(x) for x in line.split()]
f.close()

In [13]:
# wd: dictionary, containing optimal weights after dropping the small weights
# opt_num_d: dictionary, containing the numbers of stocks with large weights
# X[s]: total wealth of the optimal portolio with number s
wd={}
opt_num_d={}
#X: wealth
X=np.ones(n_experiments)
#Y: annual standart deviation
Y=np.zeros(n_experiments)
z=[]
for s in range(n_experiments):
    opt_num, w = refine(opt_portf[s,:])  
    opt_num_d[s]=opt_num
    wd[s]=w
    for t in range(N):
        X[s]=X[s]*np.dot(w,r[t,opt_num])
        z.append(np.dot(w,r[t,opt_num]))
    Y[s]=np.std(z)*np.sqrt(252)
    z=[]
print('Annual standard deviation',Y)
for k in range(d):
    print(np.std(np.log(r[:,k]))*np.sqrt(252))

Annual standard deviation [0.23309536 0.23309392 0.23308874 0.23308991 0.23307343 0.23309154
 0.23309585 0.2330972  0.23308381 0.23309987 0.23306694 0.23311011
 0.23309072 0.23309754 0.23309203 0.23308756 0.23308846 0.23307945
 0.23310785 0.23305959 0.23308448 0.23307694 0.23308851 0.23310127
 0.23309975 0.2331001  0.23309458 0.23309441 0.23310855 0.2331101 ]
0.2549095141322391
0.29537351665307476
0.23061558645184016
0.24320327024989338
0.2655855093408204
0.24349769319861328
0.2923389366228102
0.2405178287646629
0.2716151928645118
0.3614240292306868
0.25586431297482615
0.28334478416157144
0.24438426598271562
0.24513412059049966
0.25008087133897866
0.2302558712683709
0.26962358048019486
0.22625562493698154
0.3045337566120207


In [14]:
# Table_A contains optimal weights, total wealth and annual return for each optimal portolio
table_0=pd.DataFrame(data=[opt_num_d[s] for s in range(n_experiments)],columns=stocks.columns[opt_num_d[s]])
table_1=pd.DataFrame(data=[wd[s] for s in range(n_experiments)],columns=stocks.columns[opt_num])
table_2=pd.DataFrame(data=[X[s] for s in range(n_experiments)],columns=['Wealth'])
table_3=pd.DataFrame(data=[X[s]**(252/N) for s in range(n_experiments)],columns=['Annual return'])
Table_A=pd.concat([table_0,table_1,table_2,table_3],axis=1)
Table_A

Unnamed: 0,hp,morris,schlum,hp.1,morris.1,schlum.1,Wealth,Annual return
0,9,16,18,0.177511,0.747064,0.075425,4100.830209,1.206284
1,9,16,18,0.177493,0.747063,0.075444,4100.830274,1.206284
2,9,16,18,0.177471,0.747038,0.075491,4100.830373,1.206284
3,9,16,18,0.177616,0.746973,0.07541,4100.829882,1.206284
4,9,16,18,0.177464,0.746935,0.075601,4100.830381,1.206284
5,9,16,18,0.177146,0.74722,0.075634,4100.829628,1.206284
6,9,16,18,0.177399,0.747123,0.075477,4100.830343,1.206284
7,9,16,18,0.177446,0.747109,0.075445,4100.830297,1.206284
8,9,16,18,0.177534,0.746972,0.075494,4100.830257,1.206284
9,9,16,18,0.177559,0.747071,0.07537,4100.829972,1.206284


In [15]:
# Table_B contains minimal and maximal values of optimal portfolio weights (Table 3 of the paper)
a1=pd.DataFrame(index=stocks.columns[opt_num],data=[table_1.min()[j] for j in range(len(opt_num))],columns=['min weight'])
a2=pd.DataFrame(index=stocks.columns[opt_num],data=[table_1.max()[j] for j in range(len(opt_num))],columns=['max weight'])
Table_B=pd.concat([a1,a2],axis=1)
Table_B


Unnamed: 0,min weight,max weight
hp,0.177146,0.177616
morris,0.746809,0.747236
schlum,0.075345,0.075668
