In [1]:
""" This notebook 
(1) takes optimal portfolios from the file log_opt_portf_NYSE_1.txt, which correspond to n_experiment=30 runs of the GDSEG algorithm 
(2) drops small weights and normalizes the remaining ones
(3) constructs the table containing optimal weights, total wealth and annual return for each portolio
(4) constructs the table containing minimal and maximal values of each weight in these portfolios (Table 2 of the paper)
The file log_opt_portf_NYSE_1.txt should be constructed by NYSE_1_log_GDSEG.ipynb beforehand
"""
import pandas as pd
import numpy as np
from scipy import stats

In [2]:
# Importing NYSE_1 dataset
stocks=pd.read_csv('NYSE_1.csv')
print(stocks.shape)
stocks.head()

(5651, 36)


Unnamed: 0,ahp,alcoa,amerb,arco,coke,comme,dow,dupont,espey,exxon,...,merck,mmm,mobil,morris,pandg,pills,schlum,sears,sherw,tex
0,1.01515,1.02765,1.04183,1.02083,1.00637,1.04938,1.00847,1.01983,1.05426,0.99751,...,1.03148,1.03377,1.01018,1.01495,1.00775,1.00526,1.01176,1.00578,0.99697,0.99752
1,1.01493,1.04036,0.98905,0.9949,1.00475,0.95294,1.0084,1.00833,1.04412,1.005,...,1.00898,1.00251,1.01259,1.0,1.00192,1.0,1.01938,1.00958,0.99088,1.00248
2,1.0,0.97629,0.97786,0.99744,0.98583,0.98765,0.99722,0.99449,0.97183,1.0,...,0.98043,0.9599,0.99751,0.97218,0.98656,0.98429,0.97338,1.0,1.02761,0.99752
3,1.02451,1.00662,1.02642,1.00257,1.01917,1.0,0.99443,1.00693,1.0,1.0,...,1.01089,1.03655,1.01247,0.99663,1.00778,1.01596,1.0,1.0019,1.00299,1.02233
4,1.031,0.98465,1.00368,1.00513,1.00313,1.05,1.02801,1.00413,1.05797,1.01741,...,1.01077,0.99496,0.99507,0.98649,1.01158,0.99738,1.01563,1.01515,1.0119,1.00971


In [3]:
# r: array for stock returns
N=stocks.shape[0]
d=stocks.shape[1]
r=np.zeros((N,d))
r=stocks.to_numpy()

In [4]:
def refine(w,tol=1/10**3):
    """ Drops small weights and normalizes the remaining ones """
    d=w.shape[0]
    opt_num=[i for i in range(d) if w[i]>tol]
    w=w[opt_num]/np.sum(w[opt_num])    
    return opt_num, w

In [5]:
# Take optimal portfolio weights from the file log_opt_portf_NYSE_1.txt
f=open('log_opt_portf_NYSE_1.txt','r')
n_experiments=sum(1 for line in f)
f.close()
opt_portf=np.zeros((n_experiments,d))
f=open('log_opt_portf_NYSE_1.txt','r')
s=-1
for line in f:
    s+=1
    opt_portf[s,:]=[float(x) for x in line.split()]
f.close()

In [6]:
# wd: dictionary, containing optimal weights after dropping the small weights
# opt_num_d: dictionary, containing the numbers of stocks with large weights
# X[s]: total wealth of the optimal portolio with number s
wd={}
opt_num_d={}
X=np.ones(n_experiments)
for s in range(n_experiments):
    opt_num, w = refine(opt_portf[s,:])  
    opt_num_d[s]=opt_num
    wd[s]=w
    for t in range(N):
        X[s]=X[s]*np.dot(w,r[t,opt_num])

In [7]:
# Table_A contains optimal weights, total wealth and annual return for each optimal portolio
table_0=pd.DataFrame(data=[opt_num_d[s] for s in range(n_experiments)],columns=stocks.columns[opt_num_d[s]])
table_1=pd.DataFrame(data=[wd[s] for s in range(n_experiments)],columns=stocks.columns[opt_num])
table_2=pd.DataFrame(data=[X[s] for s in range(n_experiments)],columns=['Wealth'])
table_3=pd.DataFrame(data=[X[s]**(252/N) for s in range(n_experiments)],columns=['Annual return'])
Table_A=pd.concat([table_0,table_1,table_2,table_3],axis=1)
Table_A

Unnamed: 0,comme,espey,iroqu,kinar,meico,comme.1,espey.1,iroqu.1,kinar.1,meico.1,Wealth,Annual return
0,5,8,19,22,25,0.276725,0.19549,0.092524,0.250767,0.184493,250.597008,1.279321
1,5,8,19,22,25,0.276684,0.1954,0.092716,0.250845,0.184355,250.597009,1.279321
2,5,8,19,22,25,0.276749,0.195289,0.092587,0.250676,0.184699,250.597046,1.279321
3,5,8,19,22,25,0.276646,0.195616,0.092657,0.250707,0.184374,250.596963,1.279321
4,5,8,19,22,25,0.276769,0.195522,0.092577,0.250715,0.184417,250.597007,1.279321
5,5,8,19,22,25,0.276661,0.195504,0.09249,0.250636,0.184708,250.596976,1.279321
6,5,8,19,22,25,0.276868,0.195396,0.092606,0.250727,0.184403,250.597037,1.279321
7,5,8,19,22,25,0.276794,0.195406,0.092811,0.250729,0.18426,250.597003,1.279321
8,5,8,19,22,25,0.276667,0.19531,0.092917,0.250583,0.184523,250.597013,1.279321
9,5,8,19,22,25,0.277012,0.195277,0.092641,0.250763,0.184306,250.596995,1.279321


In [8]:
# Table_B contains minimal and maximal values of optimal portfolio weights (Table 2 of the paper)
a1=pd.DataFrame(index=stocks.columns[opt_num],data=[table_1.min()[j] for j in range(len(opt_num))],columns=['min weight'])
a2=pd.DataFrame(index=stocks.columns[opt_num],data=[table_1.max()[j] for j in range(len(opt_num))],columns=['max weight'])
Table_B=pd.concat([a1,a2],axis=1)
Table_B


Unnamed: 0,min weight,max weight
comme,0.276567,0.277018
espey,0.195167,0.195616
iroqu,0.09249,0.092917
kinar,0.250583,0.250845
meico,0.184213,0.184736
