# Transformación de Datos para Algoritmo de Differential Evolution

Transformación para un periodo anual

In [192]:
import numpy as np
import pandas as pd
import scipy.stats as stats
import os

In [3]:
def carga_datos(path):
    df = pd.read_parquet(path)
    df.set_index('Date', inplace=True)
    return df

In [6]:
def annualize_rets(r, periods_per_year):
    "anualiza retorns desde subperiodos de año"
    compounded_growth = np.prod(1+r)
    n_periods = r.shape[0]
    return compounded_growth**(periods_per_year / n_periods)-1

In [4]:
datos1 = carga_datos('./Data/train/opt-20130101-20141231.gzip')

In [16]:
datos1.tail()

Stock,A,AAL,AAP,AAPL,ABC,ABMD,ABT,ACN,ADBE,ADI,...,XEL,XLNX,XOM,XRAY,XRX,XYL,YUM,ZBH,ZBRA,ZION
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2014-12-24,-0.005801,0.024298,-0.005956,-0.004709,0.001969,0.003392,-0.000437,-0.001094,0.00484,0.000883,...,0.02258,0.004099,-0.008563,-0.004444,0.000711,0.001033,0.001513,0.007594,0.003202,-0.000351
2014-12-26,0.006078,0.010305,-0.001435,0.017677,-0.000982,0.00468,0.003282,-0.000657,-0.000937,-0.007582,...,0.009661,-0.003856,-0.006078,0.001488,0.004261,0.004902,0.004808,0.000526,-0.002936,0.001403
2014-12-29,-0.001208,0.017129,0.007625,-0.000702,0.001311,-0.004141,-0.005453,-0.008108,-0.007232,-0.003554,...,0.018316,-0.00296,-0.001502,-0.002043,0.004475,-0.002311,0.005742,-0.005868,-0.00064,0.005602
2014-12-30,0.000968,0.010785,-0.007381,-0.012203,-0.00382,-0.003118,0.001974,-0.003646,-0.014164,-0.009272,...,-0.023356,-0.005481,-0.000537,-0.007258,-0.009194,-0.011065,-0.003806,0.009427,0.003459,-0.001741
2014-12-31,-0.010394,0.003931,-0.004687,-0.019019,-0.01227,-0.007821,-0.014664,-0.009757,-0.0052,-0.00072,...,-0.012645,-0.00597,-0.006128,-0.001312,-0.010706,-0.009368,-0.005868,-0.010038,-0.011619,-0.005581


In [99]:
nombres = datos1.columns

In [100]:
nombres

Index(['A', 'AAL', 'AAP', 'AAPL', 'ABC', 'ABMD', 'ABT', 'ACN', 'ADBE', 'ADI',
       ...
       'XEL', 'XLNX', 'XOM', 'XRAY', 'XRX', 'XYL', 'YUM', 'ZBH', 'ZBRA',
       'ZION'],
      dtype='object', name='Stock', length=465)

In [7]:
retornos = annualize_rets(datos1, 253)

In [148]:
retornos.head(3)

Stock
A      0.193463
AAL    1.003660
AAP    0.489474
dtype: float64

In [11]:
vol_anual = datos1.cov()*np.sqrt(253)

In [147]:
vol_anual.head(3)

Stock,A,AAL,AAP,AAPL,ABC,ABMD,ABT,ACN,ADBE,ADI,...,XEL,XLNX,XOM,XRAY,XRX,XYL,YUM,ZBH,ZBRA,ZION
Stock,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
A,0.003088,0.001517,0.001019,0.000527,0.000713,0.000927,0.000983,0.001065,0.001147,0.00131,...,0.000393,0.001288,0.000767,0.001042,0.001389,0.001438,0.00109,0.001081,0.001275,0.001329
AAL,0.001517,0.008833,0.00116,0.000584,0.000971,0.001653,0.000783,0.001098,0.001685,0.001289,...,0.000433,0.001096,0.000469,0.001126,0.00127,0.001581,0.001325,0.001043,0.001584,0.001349
AAP,0.001019,0.00116,0.003829,0.00042,0.000607,0.000997,0.000937,0.000699,0.001068,0.0008,...,0.000485,0.000875,0.000479,0.00067,0.000995,0.000844,0.000823,0.000714,0.00076,0.00087


In [18]:
# Generacion de ventanas de rendimeinto de S&P
fechas_final = ['2014-12-31', '2015-03-31', '2015-06-30', '2015-09-30',
                '2015-12-31', '2016-03-31', '2016-06-30', '2016-09-30',
                '2016-12-31', '2017-03-31', '2017-06-30', '2017-09-30',
                '2017-12-31', '2018-03-31', '2018-06-30', '2018-09-30',
                '2018-12-31', '2019-03-31', '2019-06-30', '2019-09-30',
                '2019-12-31', '2020-03-31', '2020-06-30']

In [19]:
fechas_inicial = ['2013-01-01', '2013-04-01', '2013-07-01', '2013-10-01',
                  '2014-01-01', '2014-04-01', '2014-07-01', '2014-10-01',
                  '2015-01-01', '2015-04-01', '2015-07-01', '2015-10-01',
                  '2016-01-01', '2016-04-01', '2016-07-01', '2016-10-01',
                  '2017-01-01', '2017-04-01', '2017-07-01', '2017-10-01',
                  '2018-01-01', '2018-04-01', '2018-07-01']



In [36]:
datos_syp = pd.read_csv('./Data_DE/GSPC_DE.csv')

In [37]:
datos_syp.head(2)

Unnamed: 0,Date,S&P Return
0,2013-01-02,0.025403
1,2013-01-03,-0.002086


In [38]:
fechas_inicial[0]

'2013-01-01'

In [39]:
fechas_final[0]

'2014-12-31'

In [43]:
data_per1 = datos_syp[(datos_syp['Date']>=fechas_inicial[0]) & (datos_syp['Date']<=fechas_final[0])]

In [44]:
data_per1

Unnamed: 0,Date,S&P Return
0,2013-01-02,0.025403
1,2013-01-03,-0.002086
2,2013-01-04,0.004865
3,2013-01-07,-0.003123
4,2013-01-08,-0.003242
...,...,...
499,2014-12-24,-0.000139
500,2014-12-26,0.003310
501,2014-12-29,0.000862
502,2014-12-30,-0.004889


In [41]:
datos_syp.dtypes

Date           object
S&P Return    float64
dtype: object

In [49]:
(BETA, ALPHA, R, P, SE) = stats.linregress(data_per1['S&P Return'],datos1.A)

In [50]:
BETA

1.2266924602333462

In [51]:
def calculoBeta(syp, accion):
    Beta_temp,_,_,_,_ = stats.linregress(syp,accion)
    return Beta_temp

In [52]:
calculoBeta(data_per1['S&P Return'],datos1.A)

1.2266924602333462

In [142]:
def beta_df(syp, matrix):
    salida = pd.DataFrame()
    betas = list()
    nombres = matrix.columns
    for columna in matrix:
        temp0 = matrix[columna].to_numpy()
        temp = calculoBeta(syp, temp0)
        betas.append(temp)
    salida = pd.DataFrame(list(zip(nombres, betas))).transpose()
    salida.columns = salida.iloc[0]
    salida.drop([0], inplace=True)
    return salida
        
        

In [143]:
betas_list = beta_df( data_per1['S&P Return'], datos1)

In [144]:
betas_list

Unnamed: 0,A,AAL,AAP,AAPL,ABC,ABMD,ABT,ACN,ADBE,ADI,...,XEL,XLNX,XOM,XRAY,XRX,XYL,YUM,ZBH,ZBRA,ZION
1,1.22669,1.35067,0.94306,0.730989,0.754346,1.10738,0.947134,1.02088,1.22362,1.17498,...,0.610174,1.1202,0.887627,0.934686,1.34806,1.23377,0.973174,1.0095,0.986499,1.18828


In [157]:
retornos = pd.DataFrame(retornos)
retorno_np = retornos.to_numpy().transpose()
print (retorno_np.shape)
vol_anual_np = vol_anual.to_numpy()
print (vol_anual_np.shape)
betas_list_np = betas_list.to_numpy()
print (betas_list_np.shape)

final_df = np.vstack((retorno_np, vol_anual_np, betas_list_np))

(1, 465)
(465, 465)
(1, 465)


In [160]:
final_df.shape

(467, 465)

In [161]:
final_df = pd.DataFrame(final_df, columns=datos1.columns)

In [165]:
name = fechas_inicial[0]+"-"+fechas_final[0]+".csv"
final_df.to_csv('./Data_DE/'+name , index=False)

### para todos

In [190]:
def crear_csv(fechas_inicial, fechas_final, path_data):
    datos = carga_datos(path_data)
    retornos_anual = annualize_rets(datos, 253)
    vol_mat = datos.cov()*np.sqrt(253)
    datos_syp = pd.read_csv('./Data_DE/GSPC_DE.csv')
    data_per = datos_syp[(datos_syp['Date']>=fechas_inicial)  & (datos_syp['Date']<=fechas_final)]
    beta = beta_df(data_per['S&P Return'], datos)
    
    retornos = pd.DataFrame(retornos_anual)
    retorno_np = retornos.to_numpy().transpose()
    #print (retorno_np.shape)
    vol_anual_np = vol_mat.to_numpy()
    #print (vol_anual_np.shape)
    betas_list_np = beta.to_numpy()
    #print (betas_list_np.shape)
    final_df = np.vstack((retorno_np, vol_anual_np, betas_list_np))
    final_df = pd.DataFrame(final_df, columns=datos1.columns)
    name = fechas_inicial+"-"+fechas_final+".csv"
    final_df.to_csv('./Data_DE/'+name , index=False)
    return name

In [191]:
crear_csv(fechas_inicial[0], fechas_final[0], './Data/train/opt-20130101-20141231.gzip' )

'2013-01-01-2014-12-31.csv'

In [201]:
list_files_train = os.listdir('./Data/train/')


In [206]:
final_list = list()
for file in list_files_train:
    if file.endswith('gzip'):
        final_list.append(file)

In [207]:
final_list

['opt-20130101-20141231.gzip',
 'opt-20130401-20150331.gzip',
 'opt-20130701-20150630.gzip',
 'opt-20131001-20150930.gzip',
 'opt-20140101-20151231.gzip',
 'opt-20140401-20160331.gzip',
 'opt-20140701-20160630.gzip',
 'opt-20141001-20160930.gzip',
 'opt-20150101-20161231.gzip',
 'opt-20150401-20170331.gzip',
 'opt-20150701-20170630.gzip',
 'opt-20151001-20170930.gzip',
 'opt-20160101-20171231.gzip',
 'opt-20160401-20180331.gzip',
 'opt-20160701-20180630.gzip',
 'opt-20161001-20180930.gzip',
 'opt-20170101-20181231.gzip',
 'opt-20170401-20190331.gzip',
 'opt-20170701-20190630.gzip',
 'opt-20171001-20190930.gzip',
 'opt-20180101-20191231.gzip',
 'opt-20180401-20200331.gzip',
 'opt-20180701-20200630.gzip',
 'opt-20181001-20200930.gzip']

In [210]:
for i in range(len(fechas_final)):
    path_aux = os.path.join('./Data/train/',final_list[i])
    crear_csv(fechas_inicial[i], fechas_final[i], path_aux )