#Import

In [None]:
import os
import math
import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler

#Functions

In [None]:
def processing_dataset(path=''):
    list79 = pd.read_csv(path + 'list.txt',sep='\t', names=['Name'])
    time79 = pd.read_csv(path+'time.txt', sep='\t', header=None)
    mret79 = pd.read_csv(path+'MRet.txt', sep='\t', names=['Expected Return'])

    ret79 = pd.read_csv(path + 'Ret_time.txt', sep='\t',
                        names=list79['Name'].values)

    df = pd.concat([list79, mret79], axis=1)
    df.set_index(['Name'], inplace=True)

    ret_col_names = {i: 'RET' + str(v) for i, v in enumerate(time79.values)}
    ret79_trans = ret79.transpose()
    ret79_trans.rename(columns=ret_col_names, inplace=True)

    df_returns = ret79_trans

    means = [np.mean(df_returns.values[i, :]) for i in range(df_returns.values.shape[0])]

    values = df_returns.values.copy()
    positive_rate = [np.sum(values[i, :]>0)/values.shape[1] for i in range(values.shape[0])]

    stds = df_returns.std(axis=1).values

    df['Means'] = means
    ER = df['Means'].values
    # stds = df['STD'].values
    sharp_ratio = ER/stds
    values = df_returns.values.copy()
    down = [ abs(min(values[i, :])) for i in range(values.shape[0]) if \
            np.any(values[i, :] < 0) or 0]

    df['PR'] = positive_rate
    df['SR'] = sharp_ratio

    df['STD'] = stds
    df['ML'] = down
    df.drop(labels=['Expected Return'], 
            inplace=True, axis=1)
    # print('Min STD=' + str(np.min(df['STD'].values)))
    # for col_name in df.columns:
    #     df[col_name] = MinMaxScaler().fit_transform(df[col_name].values.reshape((-1, 1)))

    return df

In [None]:
def calculate_similarities(df, epsilon=0.1):
    values = df.values
    # Membership functions 
    members = []
    for i in range(values.shape[0]):
        rows = []
        for j in range(values.shape[1]):
            m_i_j = (values[i][j] - \
                     np.min(values[i][:]))/(np.max(values[i][:]) - \
                                            np.min(values[i][:]) + epsilon)
            rows.append(m_i_j)
        members.append(rows)
    members = np.asarray(members)
    # Non membership functions 
    non_members = []
    for i in range(values.shape[0]):
        rows = []
        for j in range(values.shape[1]):
            non_m_i_j = (max(values[i][:]) - values[i][j])/(max(values[i][:]) \
                                                    - np.min(values[i][:])\
                                                    + epsilon)
            rows.append(non_m_i_j)
        non_members.append(rows)
    
    non_members = np.asarray(non_members)
    
    a = 1/(3*values.shape[0])
    entropy = []
    for j in range(values.shape[1]):
        e = 0.
        for i in range(values.shape[0]):
            e += (4*math.sqrt(members[i, j]*non_members[i, j]) + 1 - members[i, j] - non_members[i, j] + 2*math.sqrt((1-members[i, j])*(1-non_members[i, j])))
        entropy.append(e*a)
    entropy = np.asarray(entropy)
    
    w_ = []
    n = values.shape[1]
    sum_of_entropy = np.sum(entropy)
    for e_j in entropy:
        w_.append((1-e_j)/(n - sum_of_entropy))
    
    w_ = np.asarray(w_)
    # print("Entropy:")
    # print(entropy)
    # print("Membership:")
    # print(members)
    # print("Non Membership:")
    # print(non_members)

    # print('Weight=') 
    # print(w_)

    members_perfect = np.zeros(shape=values.shape)
    members_perfect[:, :3] = 1
    non_members_perfect = 1 - members_perfect
    pi_ = []
    for i in range(members.shape[0]):
        row = []
        for j in range(members.shape[1]):
            p_i_j = 1 - members[i, j] - non_members[i, j]
            row.append(p_i_j)
        pi_.append(row)
    pi_ = np.asarray(pi_)
    pi_perfect = []
    for i in range(members_perfect.shape[0]):
        row = []
        for j in range(members_perfect.shape[1]):
            p_i_j = 1 - members_perfect[i, j] - non_members_perfect[i, j]
            row.append(p_i_j)
        pi_perfect.append(row)
    pi_perfect = np.asarray(pi_perfect)

    a = 1/3
    S_s = []
    for i in range(values.shape[0]):
        sum = 0.
        for j in range(values.shape[1]):
            sum += w_[j]*(2*math.sqrt(members[i, j]*members_perfect[i, j]) + 2*math.sqrt(non_members[i, j]*non_members_perfect[i, j]) \
                + 2*math.sqrt((1 - members[i, j])*(1-members_perfect[i, j])) + 2*math.sqrt((1-non_members[i, j])*(1-non_members_perfect[i, j])))
            sum *= a
        S_s.append(sum)

    df['Similarity'] = S_s
    df.sort_values(by=['Similarity', 'Name'], ascending=False, inplace=True)
    return df

In [None]:
epsilon=0.000001

#FSTE 100

In [None]:
fste_dir = 'drive/MyDrive/Dataset/IFSData/FSTE100/'
df = calculate_similarities(processing_dataset(fste_dir), epsilon=epsilon)
df["Similarity Ranking"] = df["Similarity"].rank(ascending=False)
df["SR Ranking"] = df["SR"].rank(ascending=False)
df['Similarity'] = df['Similarity']*10e2
df.sort_values(["Similarity", 'Name'], inplace = True, ascending=False)

In [None]:
# df['Similarity'] = df['Similarity']*10e6
df.round(5).head(10)

Unnamed: 0_level_0,Means,PR,SR,STD,ML,Similarity,Similarity Ranking,SR Ranking
Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
SSE.L,0.00322,0.56818,0.15438,0.02088,0.05741,180.33044,1.0,6.0
BATS.L,0.0043,0.53788,0.18018,0.02387,0.07236,179.74047,2.0,3.0
TSCO.L,0.00368,0.55682,0.1487,0.02472,0.06891,178.96875,3.0,11.0
BG.L,0.00595,0.59848,0.18085,0.03292,0.0889,178.25076,4.0,2.0
DGE.L,0.00196,0.5303,0.10078,0.0195,0.06116,177.92841,5.0,29.0
SCTN.L,0.00348,0.5303,0.1266,0.02748,0.07007,177.75803,6.0,18.0
IMT.L,0.00329,0.54545,0.14059,0.02337,0.08078,177.67859,7.0,13.0
IPR.L,0.00652,0.57955,0.18728,0.03479,0.10196,177.19817,8.0,1.0
JMAT.L,0.00368,0.54924,0.12876,0.02857,0.07906,177.12129,9.0,17.0
HSBA.L,0.00217,0.51515,0.10834,0.02004,0.07504,176.89598,10.0,25.0


In [None]:
df.sort_values(["SR", 'Name'], inplace = True, ascending=False)
df.round(5).head(10)

Unnamed: 0_level_0,Means,PR,SR,STD,ML,Similarity,Similarity Ranking,SR Ranking
Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
IPR.L,0.00652,0.57955,0.18728,0.03479,0.10196,177.19817,8.0,1.0
BG.L,0.00595,0.59848,0.18085,0.03292,0.0889,178.25076,4.0,2.0
BATS.L,0.0043,0.53788,0.18018,0.02387,0.07236,179.74047,2.0,3.0
SAB.L,0.00558,0.57197,0.16948,0.0329,0.14407,173.0014,20.0,4.0
BLT.L,0.00687,0.5947,0.15725,0.04367,0.15551,171.43674,28.0,5.0
SSE.L,0.00322,0.56818,0.15438,0.02088,0.05741,180.33044,1.0,6.0
TLW.L,0.00768,0.56818,0.15317,0.05014,0.17037,169.1057,37.0,7.0
CPW.L,0.00651,0.54545,0.15071,0.04321,0.09986,175.33875,12.0,8.0
BA.L,0.00542,0.60227,0.15003,0.03611,0.13822,173.11366,19.0,9.0
ANTO.L,0.01515,0.60606,0.14941,0.1014,0.18713,165.71333,52.0,10.0


In [None]:
df.round(5).to_csv('drive/MyDrive/Dataset/IFSData/FSTE100/results_fste100.csv')

#NASDQ

In [None]:
nasdaq = 'drive/MyDrive/Dataset/IFSData/NASDAQ/'
df = calculate_similarities(processing_dataset(nasdaq), epsilon=epsilon)
df["Similarity Ranking"] = df["Similarity"].rank(ascending=False)
df["SR Ranking"] = df["SR"].rank(ascending=False)
df['Similarity'] = df['Similarity']*10e2
df.sort_values(["Similarity", 'Name'], inplace = True, ascending=False)

In [None]:
# df['Similarity'] = df['Similarity']*10e6
df.round(5).head(10)

Unnamed: 0_level_0,Means,PR,SR,STD,ML,Similarity,Similarity Ranking,SR Ranking
Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
HCBK,0.00454,0.54924,0.1686,0.02694,0.08545,171.00318,1.0,23.0
SIAL,0.00404,0.56818,0.16714,0.02417,0.08696,170.8939,2.0,25.0
UMBF,0.00321,0.54167,0.12211,0.02629,0.06201,170.66426,3.0,141.0
KTII,0.00801,0.54545,0.20695,0.03873,0.10649,170.51224,4.0,4.0
PTNR,0.00812,0.53409,0.19839,0.04095,0.10718,169.89223,5.0,6.0
NRGY,0.00336,0.55303,0.12335,0.02727,0.07235,169.72177,6.0,136.0
NTRS,0.0033,0.55682,0.11446,0.02883,0.06894,169.44506,7.0,171.0
TAXI,0.00472,0.53409,0.12703,0.03719,0.0745,169.33184,8.0,121.0
BOKF,0.00213,0.51515,0.09429,0.02258,0.06134,169.04731,9.0,298.0
TROW,0.00531,0.56439,0.14562,0.03644,0.09136,168.91783,10.0,67.0


In [None]:
df.sort_values(["SR", 'Name'], inplace=True, ascending=False)

In [None]:
df.round(5).head(10)

Unnamed: 0_level_0,Means,PR,SR,STD,ML,Similarity,Similarity Ranking,SR Ranking
Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
AMOV,0.01014,0.57197,0.22763,0.04454,0.15862,166.32549,38.0,1.0
MICC,0.01659,0.60606,0.22666,0.0732,0.26082,155.63782,386.0,2.0
RIMM,0.01523,0.57197,0.21928,0.06945,0.16938,164.15792,83.0,3.0
KTII,0.00801,0.54545,0.20695,0.03873,0.10649,170.51224,4.0,4.0
AAPL,0.01129,0.60985,0.20191,0.05592,0.21603,159.70515,216.0,5.0
PTNR,0.00812,0.53409,0.19839,0.04095,0.10718,169.89223,5.0,6.0
ISRG,0.01383,0.55682,0.19569,0.07067,0.16991,162.56124,119.0,7.0
HANS,0.01606,0.53409,0.19497,0.08236,0.34224,139.20903,1088.0,8.0
HUBG,0.01182,0.54924,0.1946,0.06076,0.14197,165.63203,48.0,9.0
CETV,0.0105,0.56061,0.19366,0.05421,0.15147,164.95943,64.0,10.0


In [None]:
df.round(5).to_csv('drive/MyDrive/Dataset/IFSData/NASDAQ/results_nasdaq.csv')

#MIBTEL_295

In [None]:
mibtel = 'drive/MyDrive/Dataset/IFSData/MIBTEL_295/'
df = calculate_similarities(processing_dataset(mibtel), epsilon=epsilon)
df["Similarity Ranking"] = df["Similarity"].rank(ascending=False)
df["SR Ranking"] = df["SR"].rank(ascending=False)
df['Similarity'] = df['Similarity']*10e2
df.sort_values(["Similarity", 'Name'], inplace = True, ascending=False)

In [None]:
# df['Similarity'] = df['Similarity']*10e6
df.round(5).head(10)

Unnamed: 0_level_0,Means,PR,SR,STD,ML,Similarity,Similarity Ranking,SR Ranking
Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
ISPR.MI,0.00481,0.57955,0.16535,0.02908,0.0722,199.95435,1.0,7.0
ISP.MI,0.00519,0.54167,0.16787,0.03093,0.07166,199.93424,2.0,5.0
ACE.MI,0.00559,0.54545,0.17863,0.03132,0.07808,199.69471,3.0,3.0
CB.MI,0.00338,0.50379,0.13446,0.02512,0.06543,199.0553,4.0,24.0
ENEL.MI,0.00288,0.56439,0.14417,0.01999,0.07806,198.5745,5.0,14.0
IMA.MI,0.00197,0.51136,0.07982,0.02471,0.0555,197.04235,6.0,77.0
ENI.MI,0.00283,0.54545,0.11902,0.02379,0.07882,196.91118,7.0,32.0
A2A.MI,0.00403,0.45076,0.13535,0.02975,0.07696,196.90091,8.0,21.0
UBI.MI,0.00302,0.54545,0.09387,0.03216,0.06349,196.83271,9.0,57.0
IP.MI,0.00315,0.50758,0.10802,0.02918,0.07784,195.81195,10.0,39.0


In [None]:
df.sort_values(["SR", 'Name'], inplace = True, ascending=False)
df.round(5).head(10)

Unnamed: 0_level_0,Means,PR,SR,STD,ML,Similarity,Similarity Ranking,SR Ranking
Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
TFI.MI,0.01143,0.56061,0.21296,0.05366,0.21858,183.15183,92.0,1.0
DANR.MI,0.00966,0.54924,0.20414,0.04731,0.15866,190.49829,48.0,2.0
ACE.MI,0.00559,0.54545,0.17863,0.03132,0.07808,199.69471,3.0,3.0
DAN.MI,0.00903,0.54167,0.17642,0.0512,0.13525,191.83193,38.0,4.0
ISP.MI,0.00519,0.54167,0.16787,0.03093,0.07166,199.93424,2.0,5.0
TEN.MI,0.00811,0.56818,0.16768,0.04837,0.16319,188.63801,60.0,6.0
ISPR.MI,0.00481,0.57955,0.16535,0.02908,0.0722,199.95435,1.0,7.0
SIR.MI,0.00544,0.4697,0.16148,0.0337,0.12516,191.50238,43.0,8.0
FSAR.MI,0.00535,0.58712,0.16147,0.03312,0.12487,193.83051,25.0,9.0
MIR.MI,0.00432,0.5303,0.16138,0.02675,0.11085,194.92761,16.0,10.0


In [None]:
df.round(8).to_csv('drive/MyDrive/Dataset/IFSData/MIBTEL_295/results_mibtel.csv')

#EUROSTOXX_50

In [None]:
eurostoxx = 'drive/MyDrive/Dataset/IFSData/EUROSTOXX_50/'
df = calculate_similarities(processing_dataset(eurostoxx), epsilon=epsilon)
df["Similarity Ranking"] = df["Similarity"].rank(ascending=False)
df["SR Ranking"] = df["SR"].rank(ascending=False)
df['Similarity'] = df['Similarity']*10e2
df.sort_values(["Similarity", 'Name'], inplace = True, ascending=False)

In [None]:
df.round(5).head(10)

Unnamed: 0_level_0,Means,PR,SR,STD,ML,Similarity,Similarity Ranking,SR Ranking
Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
AABA.AS,0.00496,0.54545,0.17656,0.02809,0.06982,203.64622,1.0,2.0
ISP.MI,0.00514,0.54167,0.1664,0.0309,0.07166,202.96673,2.0,4.0
ELE.MC,0.00527,0.58333,0.20556,0.02564,0.08971,202.76123,3.0,1.0
ENEL.MI,0.00288,0.56439,0.14417,0.01999,0.07806,202.31502,4.0,7.0
ENI.MI,0.00283,0.54545,0.11888,0.02382,0.07928,200.96115,5.0,14.0
BBVA.MC,0.0032,0.55682,0.11526,0.02777,0.07739,200.95898,6.0,15.0
SAN.MC,0.00598,0.58712,0.17497,0.03416,0.09712,200.81799,7.0,3.0
VIV.PA,0.0033,0.54545,0.10656,0.03098,0.07575,200.51113,8.0,18.0
TEF.MC,0.00334,0.57197,0.12676,0.02634,0.09137,200.10007,9.0,11.0
SZE.PA,0.00519,0.56818,0.15196,0.03418,0.10219,199.30218,10.0,6.0


In [None]:
df.sort_values(["SR", 'Name'], inplace = True, ascending=False)
df.round(5).head(10)

Unnamed: 0_level_0,Means,PR,SR,STD,ML,Similarity,Similarity Ranking,SR Ranking
Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
ELE.MC,0.00527,0.58333,0.20556,0.02564,0.08971,202.76123,3.0,1.0
AABA.AS,0.00496,0.54545,0.17656,0.02809,0.06982,203.64622,1.0,2.0
SAN.MC,0.00598,0.58712,0.17497,0.03416,0.09712,200.81799,7.0,3.0
ISP.MI,0.00514,0.54167,0.1664,0.0309,0.07166,202.96673,2.0,4.0
RWE.DE,0.00545,0.57955,0.16359,0.03331,0.12513,197.38495,19.0,5.0
SZE.PA,0.00519,0.56818,0.15196,0.03418,0.10219,199.30218,10.0,6.0
ENEL.MI,0.00288,0.56439,0.14417,0.01999,0.07806,202.31502,4.0,7.0
BAY.DE,0.0059,0.56818,0.13758,0.04285,0.16827,190.8725,36.0,8.0
IBE.MC,0.01156,0.48106,0.1275,0.09066,0.06538,197.88707,15.0,9.0
BAS.DE,0.00413,0.55682,0.1271,0.03247,0.09709,198.95134,13.0,10.0


In [None]:
df.round(5).to_csv('drive/MyDrive/Dataset/IFSData/EUROSTOXX_50/results_eurostoxx.csv')

#SP_500

In [None]:
sp500_dir = 'drive/MyDrive/Dataset/IFSData/SP_500/'
df = calculate_similarities(processing_dataset(sp500_dir), epsilon=epsilon)
df["Similarity Ranking"] = df["Similarity"].rank(ascending=False)
df["SR Ranking"] = df["SR"].rank(ascending=False)
df['Similarity'] = df['Similarity']*10e2
df.sort_values(["Similarity", 'Name'], inplace = True, ascending=False)

In [None]:
# df['Similarity'] = df['Similarity']*10e6
df.round(5).head(10)

Unnamed: 0_level_0,Means,PR,SR,STD,ML,Similarity,Similarity Ranking,SR Ranking
Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
MCD,0.00597,0.56818,0.20631,0.02894,0.06601,169.76733,1.0,6.0
K,0.00274,0.55682,0.1544,0.01777,0.05658,169.1384,2.0,39.0
BCR,0.00468,0.58712,0.17812,0.02629,0.0672,168.83401,3.0,13.0
XEL,0.00307,0.55682,0.14451,0.02126,0.0571,168.54611,4.0,63.0
EIX,0.00553,0.625,0.21313,0.02594,0.09352,168.29846,5.0,4.0
PG,0.00242,0.57197,0.13526,0.0179,0.05926,168.20865,6.0,84.0
PCG,0.0043,0.57576,0.16827,0.02553,0.07183,168.20062,7.0,22.0
PCP,0.00835,0.5947,0.22568,0.03701,0.09413,168.14915,8.0,2.0
DUK,0.00416,0.55303,0.14888,0.02792,0.06493,167.8105,9.0,51.0
RTN,0.00399,0.61364,0.16315,0.02447,0.0782,167.6867,10.0,29.0


In [None]:
df.sort_values(["SR", 'Name'], inplace = True, ascending=False)
df.round(5).head(10)

Unnamed: 0_level_0,Means,PR,SR,STD,ML,Similarity,Similarity Ranking,SR Ranking
Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
MON,0.01035,0.60985,0.23961,0.04321,0.13159,165.84942,43.0,1.0
PCP,0.00835,0.5947,0.22568,0.03701,0.09413,168.14915,8.0,2.0
RRC,0.01097,0.59091,0.21462,0.0511,0.14929,163.25929,94.0,3.0
EIX,0.00553,0.625,0.21313,0.02594,0.09352,168.29846,5.0,4.0
AYE,0.00828,0.56439,0.20756,0.03988,0.17172,161.45951,147.0,5.0
MCD,0.00597,0.56818,0.20631,0.02894,0.06601,169.76733,1.0,6.0
AAPL,0.01129,0.60985,0.20191,0.05592,0.21603,157.9324,239.0,7.0
CME,0.00933,0.57576,0.19767,0.04719,0.16851,161.19502,155.0,8.0
AMT,0.00778,0.56818,0.19285,0.04035,0.10461,166.0345,38.0,9.0
TIE,0.01291,0.55682,0.18151,0.07112,0.18907,157.3583,260.0,10.0


In [None]:
df.round(5).to_csv('drive/MyDrive/Dataset/IFSData/SP_500/results_sp500.csv')