In [5]:
import numpy as np
import pandas as pd
from pylab import plt, mpl
from sklearn.metrics import accuracy_score
from sklearn.neural_network import MLPClassifier

plt.style.use('seaborn')
mpl.rcParams['savefig.dpi'] = 300
mpl.rcParams['font.family'] = 'serif'
pd.set_option('display.precision', 4)
np.set_printoptions(suppress=True, precision=4)


#def add_lags(data, ric, lags):
#  cols = []
# df = pd.DataFrame(data[ric])
#  for lag in range(1, lags +1):
#    #crea il nome di una colonna
#    col = 'lag_{}'.format(lag)
 #   #ritarda i dati sui prezzi
  #  df[col] = df[ric].shift(lag)
  #  #aggiunge il nome della colonna alla lista
  #  cols.append(col)
  #cancella tutte le righe incomplete
 # df.dropna(inplace = True)
  #return df, cols

def add_lags(data, ric, lags, window=50):
    cols = []
    df = pd.DataFrame(data[ric])
    df.dropna(inplace=True)
    df['r'] = np.log(df / df.shift())
    #SMA
    df['sma'] = df[ric].rolling(window).mean()
    #rolling minimum
    df['min'] = df[ric].rolling(window).min()  
    #rolling maximum
    df['max'] = df[ric].rolling(window).max()
    #momentum come media dei rendimenti log
    df['mom'] = df['r'].rolling(window).mean()
    #rolling volatility
    df['vol'] = df['r'].rolling(window).std()  
    df.dropna(inplace=True)
    #direzione come feature binaria
    df['d'] = np.where(df['r'] > 0, 1, 0)  
    features = [ric, 'r', 'd', 'sma', 'min', 'max', 'mom', 'vol']
    for f in features:
        for lag in range(1, lags + 1):
            col = f'{f}_lag_{lag}'
            df[col] = df[f].shift(lag)
            cols.append(col)
    df.dropna(inplace=True)
    return df, cols


def train_test_model(model):
    for ric in data:
        df, cols = dfs[ric]
        split = int(len(df) * 0.85)
        train = df.iloc[:split].copy()
        #Le statistiche del set di dati di addestramento vengono utilizzate per la normalizzazione.
        mu, std = train[cols].mean(), train[cols].std()
        train[cols] = (train[cols] - mu) / std
        model.fit(train[cols], train['d'])
        test = df.iloc[split:].copy()
        test[cols] = (test[cols] - mu) / std
        pred = model.predict(test[cols])
        acc = accuracy_score(test['d'], pred)
        print(f'OUT-OF-SAMPLE | {ric:7s} | acc={acc:.4f}')


url = 'http://hilpisch.com/aiif_eikon_id_data.csv'

data = pd.read_csv(url, index_col=0, parse_dates=True)

data.info()

  plt.style.use('seaborn')


<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 5529 entries, 2019-03-01 00:00:00 to 2020-01-01 00:00:00
Data columns (total 12 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   AAPL.O  3384 non-null   float64
 1   MSFT.O  3378 non-null   float64
 2   INTC.O  3275 non-null   float64
 3   AMZN.O  3381 non-null   float64
 4   GS.N    1686 non-null   float64
 5   SPY     3388 non-null   float64
 6   .SPX    1802 non-null   float64
 7   .VIX    2959 non-null   float64
 8   EUR=    5429 non-null   float64
 9   XAU=    5149 non-null   float64
 10  GDX     3173 non-null   float64
 11  GLD     3351 non-null   float64
dtypes: float64(12)
memory usage: 561.5 KB


In [6]:
data.head()

Unnamed: 0_level_0,AAPL.O,MSFT.O,INTC.O,AMZN.O,GS.N,SPY,.SPX,.VIX,EUR=,XAU=,GDX,GLD
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
2019-03-01 00:00:00,173.15,112.09,53.03,1640.4,,278.59,,,1.1373,1314.0663,22.24,124.07
2019-03-01 01:00:00,173.44,112.24,,1642.8,,278.8,,,1.1369,1312.91,22.24,124.08
2019-03-01 02:00:00,,,,,,,,,1.1372,1314.3636,,
2019-03-01 03:00:00,,,,,,,,,1.1367,1313.2452,,
2019-03-01 04:00:00,,,,,,,,,1.1369,1313.1746,,


In [7]:
model_mlp = MLPClassifier(hidden_layer_sizes=[512],
                          random_state=100,
                          max_iter=1000,
                          early_stopping=True,
                          validation_fraction=0.15,
                          shuffle=False)

lags = 5

dfs = {}

for ric in data:
  df, cols = add_lags(data, ric, lags)
  dfs[ric] = df, cols

train_test_model(model_mlp)

OUT-OF-SAMPLE | AAPL.O  | acc=0.5420
OUT-OF-SAMPLE | MSFT.O  | acc=0.4930
OUT-OF-SAMPLE | INTC.O  | acc=0.5549
OUT-OF-SAMPLE | AMZN.O  | acc=0.4709
OUT-OF-SAMPLE | GS.N    | acc=0.5184
OUT-OF-SAMPLE | SPY     | acc=0.4860
OUT-OF-SAMPLE | .SPX    | acc=0.5019
OUT-OF-SAMPLE | .VIX    | acc=0.4885
OUT-OF-SAMPLE | EUR=    | acc=0.5130
OUT-OF-SAMPLE | XAU=    | acc=0.4824
OUT-OF-SAMPLE | GDX     | acc=0.4765
OUT-OF-SAMPLE | GLD     | acc=0.5455


In [8]:
from sklearn.ensemble import BaggingClassifier

#stimatore base
base_estimator = MLPClassifier(hidden_layer_sizes=[256],
                                random_state=100,
                                max_iter=1000,
                                early_stopping=True,
                                validation_fraction=0.15,
                                shuffle=False)  

model_bag = BaggingClassifier(base_estimator=base_estimator,
                              #numero di stimatori usati
                              n_estimators=35,
                              #percentuale massima di dati di addestramento utilizzati per 
                              #stimatore
                              max_samples=0.25,
                              #numero massimo di features usate per stimatore
                              max_features=0.5,
                              #se eseguire il bootstrap (riutilizzo) dei dati  
                              bootstrap=False,
                              #se eseguire il bootstrap (riutilizzo) delle funzionalità
                              bootstrap_features=True,
                              #numero di lavori paralleli
                              n_jobs=8, 
                              random_state=100
                              )

train_test_model(model_bag)



OUT-OF-SAMPLE | AAPL.O  | acc=0.5660




OUT-OF-SAMPLE | MSFT.O  | acc=0.5551




OUT-OF-SAMPLE | INTC.O  | acc=0.5072




OUT-OF-SAMPLE | AMZN.O  | acc=0.4830




OUT-OF-SAMPLE | GS.N    | acc=0.5020




OUT-OF-SAMPLE | SPY     | acc=0.4680




OUT-OF-SAMPLE | .SPX    | acc=0.4677




OUT-OF-SAMPLE | .VIX    | acc=0.5161




OUT-OF-SAMPLE | EUR=    | acc=0.5242




OUT-OF-SAMPLE | XAU=    | acc=0.5229




OUT-OF-SAMPLE | GDX     | acc=0.5107




OUT-OF-SAMPLE | GLD     | acc=0.5475
