In [1]:
import numpy as np
import pandas as pd
from pylab import plt, mpl
plt.style.use('seaborn')
mpl.rcParams['savefig.dpi'] = 300
mpl.rcParams['font.family'] = 'serif'
pd.set_option('display.precision', 4)
np.set_printoptions(suppress=True, precision=4)
#leggiamo i dati all'interno di un DataFrame
url = 'http://hilpisch.com/aiif_eikon_eod_data.csv'
data = pd.read_csv(url, index_col=0, parse_dates=True).dropna()

  plt.style.use('seaborn')


In [2]:
#deriva i rendimenti logaritmici dai dati sui prezzi
rets = np.log(data / data.shift(1))
rets.dropna(inplace=True)

In [3]:
from numpy.core.fromnumeric import std
#numero di ritardi (in trading days)
lags = 7

def add_lags(data, ric, lags):
  cols = []
  df = pd.DataFrame(data[ric])
  for lag in range(1, lags +1):
    #crea il nome di una colonna
    col = 'lag_{}'.format(lag)
    #ritarda i dati sui prezzi
    df[col] = df[ric].shift(lag)
    #aggiunge il nome della colonna alla lista
    cols.append(col)
  #cancella tutte le righe incomplete
  df.dropna(inplace = True)
  return df, cols


dfs = {}
for sym in data:
  #ritarda i dati dei rendimenti log
  df, cols = add_lags(rets, sym, lags)
  #applica la normalizzazione Gaussiana alle features data
  mu, std = df[cols].mean(), df[cols].std()
  df[cols] = (df[cols] - mu) /std
  dfs[sym] = df

#mostra un esempio dei dati ritardati 
dfs[sym].head()

Unnamed: 0_level_0,GLD,lag_1,lag_2,lag_3,lag_4,lag_5,lag_6,lag_7
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
2010-01-14,0.0044,0.957,-2.1692,1.3386,0.4959,-0.6434,1.6613,-0.1028
2010-01-15,-0.0105,0.4379,0.9571,-2.1689,1.3388,0.4966,-0.6436,1.6614
2010-01-19,0.0059,-1.0842,0.4385,0.9562,-2.169,1.3395,0.4958,-0.6435
2010-01-20,-0.0234,0.5967,-1.0823,0.4378,0.9564,-2.1686,1.3383,0.4958
2010-01-21,-0.0145,-2.4045,0.5971,-1.0825,0.4379,0.9571,-2.168,1.3384


In [4]:
from statsmodels.tsa.stattools import adfuller
#testa la stazionarietà delle serie temporali
adfuller(dfs[sym]['lag_1'])

(-51.56825150582553,
 0.0,
 0,
 2507,
 {'1%': -3.4329610922579095,
  '5%': -2.8626935681060375,
  '10%': -2.567384088736619},
 7017.165474260225)

In [5]:
#mostra la corelazione tra i dati delle features
dfs[sym].corr()

Unnamed: 0,GLD,lag_1,lag_2,lag_3,lag_4,lag_5,lag_6,lag_7
GLD,1.0,-0.0297,0.0003,0.012635,-0.0026,-0.0059392,0.0099,-0.0013
lag_1,-0.0297,1.0,-0.0305,0.00081418,0.0128,-0.0028765,-0.0053,0.0098
lag_2,0.0003,-0.0305,1.0,-0.031617,0.0003,0.013234,-0.0043,-0.0052
lag_3,0.0126,0.0008,-0.0316,1.0,-0.0313,-6.8542e-06,0.0141,-0.0044
lag_4,-0.0026,0.0128,0.0003,-0.031329,1.0,-0.031761,0.0002,0.0141
lag_5,-0.0059,-0.0029,0.0132,-6.8542e-06,-0.0318,1.0,-0.0323,0.0002
lag_6,0.0099,-0.0053,-0.0043,0.014115,0.0002,-0.032289,1.0,-0.0324
lag_7,-0.0013,0.0098,-0.0052,-0.0043869,0.0141,0.00021707,-0.0324,1.0


In [6]:
from sklearn.metrics import accuracy_score

for sym in data:
    df = dfs[sym]
    #regression step
    reg = np.linalg.lstsq(df[cols], df[sym], rcond=-1)[0]
    #step predittivo
    pred = np.dot(df[cols], reg)
    #accuratezza della previsione
    acc = accuracy_score(np.sign(df[sym]), np.sign(pred))
    print(f'OLS | {sym:10s} | acc={acc:.4f}')

OLS | AAPL.O     | acc=0.5056
OLS | MSFT.O     | acc=0.5088
OLS | INTC.O     | acc=0.5040
OLS | AMZN.O     | acc=0.5048
OLS | GS.N       | acc=0.5080
OLS | SPY        | acc=0.5080
OLS | .SPX       | acc=0.5167
OLS | .VIX       | acc=0.5291
OLS | EUR=       | acc=0.4984
OLS | XAU=       | acc=0.5207
OLS | GDX        | acc=0.5307
OLS | GLD        | acc=0.5072


In [7]:
from sklearn.neural_network import MLPRegressor

for sym in data.columns:
    df = dfs[sym]
    #instanziazione del modello
    model = MLPRegressor(hidden_layer_sizes=[512],
                         random_state=100,
                         max_iter=1000,
                         early_stopping=True,
                         validation_fraction=0.15,
                         shuffle=False)
    #adattamento del modello
    model.fit(df[cols], df[sym])
    #passo predittivo
    pred = model.predict(df[cols])
    #calcolo dell'accuratezza
    acc = accuracy_score(np.sign(df[sym]), np.sign(pred))
    print(f'MLP | {sym:10s} | acc={acc:.4f}')


MLP | AAPL.O     | acc=0.6005
MLP | MSFT.O     | acc=0.5853
MLP | INTC.O     | acc=0.5766
MLP | AMZN.O     | acc=0.5510
MLP | GS.N       | acc=0.6527
MLP | SPY        | acc=0.5419
MLP | .SPX       | acc=0.5399
MLP | .VIX       | acc=0.6579
MLP | EUR=       | acc=0.5642
MLP | XAU=       | acc=0.5522
MLP | GDX        | acc=0.6029
MLP | GLD        | acc=0.5259


In [9]:
import tensorflow as tf
from keras.layers import Dense
from keras.models import Sequential

np.random.seed(100)
tf.random.set_seed(100)

In [10]:
#funzione di creazione del modello
def create_model(problem='regression'): 
  model = Sequential()
  model.add(Dense(512, input_dim=len(cols),
                    activation='relu'))
  if problem == 'regression':
    model.add(Dense(1, activation='linear'))
    model.compile(loss='mse', optimizer='adam')
  else:
    model.add(Dense(1, activation='sigmoid'))
    model.compile(loss='binary_crossentropy', optimizer='adam')
  return model

In [12]:
%%time
for sym in data.columns[:]:
    df = dfs[sym]
    #istanziazione del modelli
    model = create_model()  
    #adattamento del modello
    model.fit(df[cols], df[sym], epochs=25, verbose=False)
    #step pedittivo  
    pred = model.predict(df[cols])
    #calcolo dell'accuratezza  
    acc = accuracy_score(np.sign(df[sym]), np.sign(pred))  
    print(f'DNN | {sym:10s} | acc={acc:.4f}')


DNN | AAPL.O     | acc=0.6192
DNN | MSFT.O     | acc=0.6216
DNN | INTC.O     | acc=0.5634
DNN | AMZN.O     | acc=0.5809
DNN | GS.N       | acc=0.6240
DNN | SPY        | acc=0.5734
DNN | .SPX       | acc=0.5821
DNN | .VIX       | acc=0.6033
DNN | EUR=       | acc=0.5781
DNN | XAU=       | acc=0.5726
DNN | GDX        | acc=0.6288
DNN | GLD        | acc=0.5781
CPU times: user 1min 1s, sys: 2.6 s, total: 1min 4s
Wall time: 1min 9s


In [19]:
split = int(len(dfs[sym]) * 0.8)

for sym in data.columns:
    df = dfs[sym]
    #crea il sottoinsieme per il training
    train = df.iloc[:split] 
    reg = np.linalg.lstsq(train[cols], train[sym], rcond=-1)[0]
    #crea il sottoinsieme per il test
    test = df.iloc[split:]  
    pred = np.dot(test[cols], reg)
    acc = accuracy_score(np.sign(test[sym]), np.sign(pred))
    print(f'OLS | {sym:10s} | acc={acc:.4f}')

OLS | AAPL.O     | acc=0.5219
OLS | MSFT.O     | acc=0.4960
OLS | INTC.O     | acc=0.5418
OLS | AMZN.O     | acc=0.4841
OLS | GS.N       | acc=0.4980
OLS | SPY        | acc=0.5020
OLS | .SPX       | acc=0.5120
OLS | .VIX       | acc=0.5458
OLS | EUR=       | acc=0.4482
OLS | XAU=       | acc=0.5299
OLS | GDX        | acc=0.5159
OLS | GLD        | acc=0.5100


In [20]:
for sym in data.columns:
    df = dfs[sym]
    train = df.iloc[:split]
    model = MLPRegressor(hidden_layer_sizes=[512],
                          random_state=100,
                          max_iter=1000,
                          early_stopping=True,
                          validation_fraction=0.15,
                          shuffle=False)
    model.fit(train[cols], train[sym])
    test = df.iloc[split:]
    pred = model.predict(test[cols])
    acc = accuracy_score(np.sign(test[sym]), np.sign(pred))
    print(f'MLP | {sym:10s} | acc={acc:.4f}')

MLP | AAPL.O     | acc=0.4920
MLP | MSFT.O     | acc=0.5279
MLP | INTC.O     | acc=0.5279
MLP | AMZN.O     | acc=0.4641
MLP | GS.N       | acc=0.5040
MLP | SPY        | acc=0.5259
MLP | .SPX       | acc=0.5478
MLP | .VIX       | acc=0.5279
MLP | EUR=       | acc=0.4980
MLP | XAU=       | acc=0.5239
MLP | GDX        | acc=0.4880
MLP | GLD        | acc=0.5000


In [21]:
%%time
for sym in data.columns:
    df = dfs[sym]
    train = df.iloc[:split]
    model = create_model()
    model.fit(train[cols], train[sym], epochs=50, verbose=False)
    test = df.iloc[split:]
    pred = model.predict(test[cols])
    acc = accuracy_score(np.sign(test[sym]), np.sign(pred))
    print(f'DNN | {sym:10s} | acc={acc:.4f}')

DNN | AAPL.O     | acc=0.5518
DNN | MSFT.O     | acc=0.5080
DNN | INTC.O     | acc=0.4661
DNN | AMZN.O     | acc=0.5239
DNN | GS.N       | acc=0.5339
DNN | SPY        | acc=0.5359
DNN | .SPX       | acc=0.5478
DNN | .VIX       | acc=0.5120
DNN | EUR=       | acc=0.5159
DNN | XAU=       | acc=0.5060
DNN | GDX        | acc=0.4920
DNN | GLD        | acc=0.4781
CPU times: user 1min 28s, sys: 4.14 s, total: 1min 32s
Wall time: 1min 40s
