In [17]:
import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.metrics import mean_absolute_error, mean_squared_error

import yfinance as yf
from yahooquery import Ticker

from sklearn.svm import SVR
from sklearn.svm import LinearSVR
from sklearn.model_selection import train_test_split

%matplotlib inline

from utils import *

import tensorflow as tf

from tensorflow.keras.layers import Dropout, Dense, Flatten, Conv1D, MaxPool1D, LSTM, GRU

from sklearn.metrics import mean_squared_error as mse

import xgboost as xgb
from tensorflow.keras.models import Sequential

from sklearn.preprocessing import MinMaxScaler, RobustScaler

from tensorflow.keras.callbacks import EarlyStopping

In [3]:
class CFG:
    data_path = "../data"
    img_dim1 = 20
    img_dim2 = 10
    seed = 42
   

# adjust the parameters for displayed figures    
plt.rcParams.update({'figure.figsize': (CFG.img_dim1,CFG.img_dim2)})   


def seed_everything(seed: int = 42) -> None:
    np.random.seed(seed)
    os.environ["PYTHONHASHSEED"] = str(seed)
    
seed_everything(CFG.seed)

In [4]:
ensemble_predictions_n_errors = pd.DataFrame(
                                        data=None,
                                        index=None,
                                        columns=['1_day', '5_days', '10_days', '1_month', '2_months', '6_months', '1_year']
                                        )

ensemble_predictions_n_errors['Dataset'] = ['IXIC', 'IXIC', 'IXIC', 'DJI', 'DJI', 'DJI', 'SPX', 'SPX', 'SPX']
ensemble_predictions_n_errors['Metric'] = ['RMSE', 'MAE', 'MAPE', 'RMSE', 'MAE', 'MAPE', 'RMSE', 'MAE', 'MAPE']

ensemble_predictions_n_errors.set_index(['Dataset', 'Metric'], inplace=True)

In [5]:
START_DATE = '2017-01-01'
END_DATE = '2023-01-01'

nq = yf.download(tickers = "^IXIC",  
                    start=START_DATE, 
                    end=END_DATE)

dji = yf.download(tickers = "^DJI",  
                    start=START_DATE, 
                    end=END_DATE)

spx = yf.download(tickers = "^SPX",  
                    start=START_DATE, 
                    end=END_DATE)

[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed


In [6]:
merged_df_dict = {
                  'Date': np.array(nq.index), 
                  'IXIC': np.array(nq.Close.values), 
                  'SPX': np.array(spx.Close.values), 
                  'DJI': np.array(dji.Close.values)
                  }

merged_df_index = nq.index

merged_df = pd.DataFrame(data=merged_df_dict, index=merged_df_index, columns=['Date', 'IXIC', 'SPX', 'DJI'])

In [8]:
early_stop = EarlyStopping(monitor = 'val_loss', min_delta = 0.001, 
                           patience = 5, mode = 'min', verbose = 1,
                           restore_best_weights = True)

In [20]:
def create_lstm(out_dim):    
    
    model=Sequential()
    model.add(LSTM(64, return_sequences = True, input_shape= [None,1]))
    model.add(Dropout(0.2))
    model.add(LSTM(128, return_sequences = True))
    model.add(Dropout(0.2))
    model.add(LSTM(64, return_sequences = True))
    model.add(Dropout(0.2))
    model.add(LSTM(32))
    model.add(Dense(out_dim))
    
    model.compile(loss='mean_squared_error', optimizer='adam')
    return model

def create_gru(output_dim):

  model=Sequential()
  model.add(GRU(64, return_sequences = True, input_shape= [None,1]))
  model.add(Dropout(0.2))
  model.add(GRU(128, return_sequences = True))
  model.add(Dropout(0.2))
  model.add(GRU(64, return_sequences = True))
  model.add(Dropout(0.2))
  model.add(GRU(32))
  model.add(Dense(output_dim))
    
  model.compile(loss='mean_squared_error', optimizer='adam')
  return model

def create_cnn(input_size=50, output_dim=1):
  model = Sequential([
    Conv1D(filters=128, kernel_size=2, activation='relu', input_shape=(input_size, 1)),
    Conv1D(filters=64, kernel_size=2, activation='relu'),
    MaxPool1D(pool_size=4),
    Flatten(),
    Dense(32, activation='relu'),
    Dense(output_dim),
  ])
  model.compile(loss='mean_squared_error', optimizer='adam')
  return model

def create_xgb():
  model = xgb.XGBRegressor(
          n_estimators=1000,
          eta=0.05
          )

  return model

def make_models(input_size, output_dim):
  xgb = create_xgb()
  lstm = create_lstm(output_dim)
  gru = create_gru(output_dim)
  cnn = create_cnn(input_size, output_dim)

  return xgb, lstm, gru, cnn

In [157]:
def ensemble_pipe(lookback_horizon, save_dir, model_str):

  prediction_dict = {}
  scalers_dict = {}
  for dataset in ['IXIC', 'DJI', 'SPX']:

    df = merged_df[['Date', dataset]].copy()
    df['Date'] = pd.to_datetime(df['Date'])
    df.set_index('Date', inplace=True)

    scaler = MinMaxScaler()
    df = scaler.fit_transform(df)
    
    # 
    for pred_horizon, horizon_string in [[1, '1_day'], [5, '5_days'], [10, '10_days'], [30, '1_month']]:
      
      look_back = lookback_horizon
      look_ahead = pred_horizon

      xdat, ydat = create_dataset(
                                  df,
                                  look_back = look_back,
                                  look_ahead = look_ahead
                                  )

      # [batch size, time steps, dimensionality]
      xtrain, ytrain, xvalid, yvalid = prepare_split(
                                                    xdat,
                                                    ydat,
                                                    cutoff = int(len(xdat) * 0.8),
                                                    timesteps = look_back
                                                    )

      
      
      #####################################################################
      ############################# LSTM ##################################
      lstm = create_lstm(look_ahead)

      lstm.fit(
              xtrain, 
              ytrain, 
              validation_data = (xvalid, yvalid), 
              epochs = 20, 
              batch_size = 32,
              callbacks=[early_stop],
              verbose=False
              )

      y_pred = lstm.predict(xvalid)
      # y_pred = scaler.inverse_transform(y_pred)

      prediction_dict.update({f"{dataset}_lstm_{horizon_string}": np.array(y_pred)})

      print(f"{dataset}_lstm_{horizon_string} DONE")
      ############################# LSTM ##################################
      #####################################################################    

      #####################################################################
      ############################# GRU ###################################
      gru = create_gru(look_ahead)

      gru.fit(
                xtrain, 
                ytrain, 
                validation_data = (xvalid, yvalid), 
                epochs = 20, 
                batch_size = 32,
                callbacks=[early_stop],
                verbose=False
                )

      y_pred = gru.predict(xvalid)
      # y_pred = scaler.inverse_transform(y_pred)

      prediction_dict.update({f"{dataset}_gru_{horizon_string}": np.array(y_pred)})
      print(f"{dataset}_gru_{horizon_string} DONE")
      ############################# GRU ###################################
      #####################################################################
      xtrain, ytrain, xvalid, yvalid = prepare_split(
                                                    xdat,
                                                    ydat,
                                                    cutoff = int(len(xdat) * 0.8),
                                                    timesteps = look_back
                                                    )
      #####################################################################
      #####################################################################
      # CNN and XGB needs this operations in order to work properly
      # It's a shape thingy 
      xtrain.shape = xtrain.shape[0], xtrain.shape[1]
      xvalid.shape = xvalid.shape[0], xvalid.shape[1]
      #####################################################################
      #####################################################################

      #####################################################################
      ############################# CNN ###################################
      cnn = create_cnn(xtrain.shape[1], look_ahead)
      cnn.compile(optimizer="adam", loss="mse", metrics=["acc"])
      
      cnn.fit(x=xtrain, y=ytrain,
              validation_data=(xvalid, yvalid),
              epochs=20, steps_per_epoch=400, 
              validation_steps=10, verbose=False,
              callbacks=[early_stop])
      
      y_pred = cnn.predict(xvalid)
      # y_pred = scaler.inverse_transform(y_pred)
      
      prediction_dict.update({f"{dataset}_cnn_{horizon_string}": np.array(y_pred)})
      print(f"{dataset}_cnn_{horizon_string} DONE")
      ############################# CNN ###################################
      #####################################################################

      #####################################################################
      ############################# XGB ###################################
      xgb = create_xgb()

      xgb.fit(
        xtrain, ytrain,
        eval_set=[(xtrain, ytrain), (xvalid, yvalid)],
        eval_metric='logloss',
        early_stopping_rounds=50,
        verbose=False
      )
      y_pred = xgb.predict(xvalid)

      if pred_horizon == 1:
        y_pred = y_pred.reshape(-1, 1)
        
      # y_pred = scaler.inverse_transform(y_pred.reshape(-1, 1))

      prediction_dict.update({f"{dataset}_xgb_{horizon_string}": np.array(y_pred)})
      print(f"{dataset}_xgb_{horizon_string} DONE")
      ############################# XGB ###################################
      #####################################################################  
       
      prediction_dict.update({f"{dataset}_valid_data_{horizon_string}": np.array(yvalid)})
      # yvalid = scaler.inverse_transform(yvalid)
      # print(f"valid is {yvalid}")
      # print(prediction_list)

    scalers_dict.update({f"{dataset}_scaler": scaler})
  return prediction_dict, scalers_dict

In [158]:
# prediction_dict4 = {}
# prediction_dict4, local_scaler4 = ensemble_pipe(20, '20d', 'ensemble')

In [160]:
prediction_dict_20d = {}
prediction_dict_20d, local_scalers_20d = ensemble_pipe(20, '20d', 'ensemble')

Restoring model weights from the end of the best epoch: 5.
Epoch 10: early stopping
IXIC_lstm_1_day DONE
Restoring model weights from the end of the best epoch: 2.
Epoch 7: early stopping
IXIC_gru_1_day DONE
Restoring model weights from the end of the best epoch: 3.
Epoch 8: early stopping
IXIC_cnn_1_day DONE




IXIC_xgb_1_day DONE
Restoring model weights from the end of the best epoch: 3.
Epoch 8: early stopping
IXIC_lstm_5_days DONE
Restoring model weights from the end of the best epoch: 4.
Epoch 9: early stopping
IXIC_gru_5_days DONE
Restoring model weights from the end of the best epoch: 10.
Epoch 15: early stopping
IXIC_cnn_5_days DONE
IXIC_xgb_5_days DONE
Restoring model weights from the end of the best epoch: 1.
Epoch 6: early stopping
IXIC_lstm_10_days DONE
Restoring model weights from the end of the best epoch: 2.
Epoch 7: early stopping
IXIC_gru_10_days DONE
Restoring model weights from the end of the best epoch: 7.
Epoch 12: early stopping
IXIC_cnn_10_days DONE
IXIC_xgb_10_days DONE
Restoring model weights from the end of the best epoch: 6.
Epoch 11: early stopping
IXIC_lstm_1_month DONE
Restoring model weights from the end of the best epoch: 3.
Epoch 8: early stopping
IXIC_gru_1_month DONE
Restoring model weights from the end of the best epoch: 6.
Epoch 11: early stopping
IXIC_cnn_



DJI_xgb_1_day DONE
Restoring model weights from the end of the best epoch: 2.
Epoch 7: early stopping
DJI_lstm_5_days DONE
Restoring model weights from the end of the best epoch: 2.
Epoch 7: early stopping
DJI_gru_5_days DONE
Restoring model weights from the end of the best epoch: 1.
Epoch 6: early stopping
DJI_cnn_5_days DONE
DJI_xgb_5_days DONE
Restoring model weights from the end of the best epoch: 2.
Epoch 7: early stopping
DJI_lstm_10_days DONE
Restoring model weights from the end of the best epoch: 2.
Epoch 7: early stopping
DJI_gru_10_days DONE
Restoring model weights from the end of the best epoch: 3.
Epoch 8: early stopping
DJI_cnn_10_days DONE
DJI_xgb_10_days DONE
Restoring model weights from the end of the best epoch: 3.
Epoch 8: early stopping
DJI_lstm_1_month DONE
Restoring model weights from the end of the best epoch: 3.
Epoch 8: early stopping
DJI_gru_1_month DONE
Restoring model weights from the end of the best epoch: 2.
Epoch 7: early stopping
DJI_cnn_1_month DONE
DJI_



SPX_xgb_1_day DONE
Restoring model weights from the end of the best epoch: 2.
Epoch 7: early stopping
SPX_lstm_5_days DONE
Restoring model weights from the end of the best epoch: 2.
Epoch 7: early stopping
SPX_gru_5_days DONE
Restoring model weights from the end of the best epoch: 3.
Epoch 8: early stopping
SPX_cnn_5_days DONE
SPX_xgb_5_days DONE
Restoring model weights from the end of the best epoch: 2.
Epoch 7: early stopping
SPX_lstm_10_days DONE
Restoring model weights from the end of the best epoch: 2.
Epoch 7: early stopping
SPX_gru_10_days DONE
Restoring model weights from the end of the best epoch: 2.
Epoch 7: early stopping
SPX_cnn_10_days DONE
SPX_xgb_10_days DONE
Restoring model weights from the end of the best epoch: 3.
Epoch 8: early stopping
SPX_lstm_1_month DONE
Restoring model weights from the end of the best epoch: 5.
Epoch 10: early stopping
SPX_gru_1_month DONE
Restoring model weights from the end of the best epoch: 1.
Epoch 6: early stopping
SPX_cnn_1_month DONE
SPX

In [173]:
prediction_dict_50d = {}
prediction_dict_50d, local_scalers_50d = ensemble_pipe(50, '50d', 'ensemble')

Restoring model weights from the end of the best epoch: 1.
Epoch 6: early stopping
IXIC_lstm_1_day DONE
Restoring model weights from the end of the best epoch: 2.
Epoch 7: early stopping
IXIC_gru_1_day DONE
Restoring model weights from the end of the best epoch: 1.
Epoch 6: early stopping
IXIC_cnn_1_day DONE




IXIC_xgb_1_day DONE
Restoring model weights from the end of the best epoch: 4.
Epoch 9: early stopping
IXIC_lstm_5_days DONE
Restoring model weights from the end of the best epoch: 2.
Epoch 7: early stopping
IXIC_gru_5_days DONE
Restoring model weights from the end of the best epoch: 5.
Epoch 10: early stopping
IXIC_cnn_5_days DONE
IXIC_xgb_5_days DONE
Restoring model weights from the end of the best epoch: 9.
Epoch 14: early stopping
IXIC_lstm_10_days DONE
Restoring model weights from the end of the best epoch: 1.
Epoch 6: early stopping
IXIC_gru_10_days DONE
Restoring model weights from the end of the best epoch: 1.
Epoch 6: early stopping
IXIC_cnn_10_days DONE
IXIC_xgb_10_days DONE
Restoring model weights from the end of the best epoch: 12.
Epoch 17: early stopping
IXIC_lstm_1_month DONE
Restoring model weights from the end of the best epoch: 5.
Epoch 10: early stopping
IXIC_gru_1_month DONE
Restoring model weights from the end of the best epoch: 4.
Epoch 9: early stopping
IXIC_cnn_



DJI_xgb_1_day DONE
Restoring model weights from the end of the best epoch: 7.
Epoch 12: early stopping
DJI_lstm_5_days DONE
Restoring model weights from the end of the best epoch: 2.
Epoch 7: early stopping
DJI_gru_5_days DONE
Restoring model weights from the end of the best epoch: 6.
Epoch 11: early stopping
DJI_cnn_5_days DONE
DJI_xgb_5_days DONE
Restoring model weights from the end of the best epoch: 4.
Epoch 9: early stopping
DJI_lstm_10_days DONE
Restoring model weights from the end of the best epoch: 2.
Epoch 7: early stopping
DJI_gru_10_days DONE
Restoring model weights from the end of the best epoch: 2.
Epoch 7: early stopping
DJI_cnn_10_days DONE
DJI_xgb_10_days DONE
Restoring model weights from the end of the best epoch: 2.
Epoch 7: early stopping
DJI_lstm_1_month DONE
Restoring model weights from the end of the best epoch: 5.
Epoch 10: early stopping
DJI_gru_1_month DONE
Restoring model weights from the end of the best epoch: 2.
Epoch 7: early stopping
DJI_cnn_1_month DONE
D



SPX_xgb_1_day DONE
Restoring model weights from the end of the best epoch: 1.
Epoch 6: early stopping
SPX_lstm_5_days DONE
Restoring model weights from the end of the best epoch: 2.
Epoch 7: early stopping
SPX_gru_5_days DONE
Restoring model weights from the end of the best epoch: 2.
Epoch 7: early stopping
SPX_cnn_5_days DONE
SPX_xgb_5_days DONE
Restoring model weights from the end of the best epoch: 2.
Epoch 7: early stopping
SPX_lstm_10_days DONE
Restoring model weights from the end of the best epoch: 2.
Epoch 7: early stopping
SPX_gru_10_days DONE
Restoring model weights from the end of the best epoch: 8.
Epoch 13: early stopping
SPX_cnn_10_days DONE
SPX_xgb_10_days DONE
Restoring model weights from the end of the best epoch: 6.
Epoch 11: early stopping
SPX_lstm_1_month DONE
Restoring model weights from the end of the best epoch: 6.
Epoch 11: early stopping
SPX_gru_1_month DONE
Restoring model weights from the end of the best epoch: 4.
Epoch 9: early stopping
SPX_cnn_1_month DONE
S

In [281]:
prediction_dict_80d = {}
prediction_dict_80d, local_scalers_80d = ensemble_pipe(80, '80d', 'ensemble')

Restoring model weights from the end of the best epoch: 1.
Epoch 6: early stopping
IXIC_lstm_1_day DONE
Restoring model weights from the end of the best epoch: 2.
Epoch 7: early stopping
IXIC_gru_1_day DONE
Restoring model weights from the end of the best epoch: 8.
Epoch 13: early stopping
IXIC_cnn_1_day DONE




IXIC_xgb_1_day DONE
Restoring model weights from the end of the best epoch: 10.
Epoch 15: early stopping
IXIC_lstm_5_days DONE
Restoring model weights from the end of the best epoch: 2.
Epoch 7: early stopping
IXIC_gru_5_days DONE
Restoring model weights from the end of the best epoch: 4.
Epoch 9: early stopping
IXIC_cnn_5_days DONE
IXIC_xgb_5_days DONE
Restoring model weights from the end of the best epoch: 3.
Epoch 8: early stopping
IXIC_lstm_10_days DONE
Restoring model weights from the end of the best epoch: 3.
Epoch 8: early stopping
IXIC_gru_10_days DONE
Restoring model weights from the end of the best epoch: 6.
Epoch 11: early stopping
IXIC_cnn_10_days DONE
IXIC_xgb_10_days DONE
Restoring model weights from the end of the best epoch: 3.
Epoch 8: early stopping
IXIC_lstm_1_month DONE
Restoring model weights from the end of the best epoch: 4.
Epoch 9: early stopping
IXIC_gru_1_month DONE
Restoring model weights from the end of the best epoch: 1.
Epoch 6: early stopping
IXIC_cnn_1_



DJI_xgb_1_day DONE
Restoring model weights from the end of the best epoch: 7.
Epoch 12: early stopping
DJI_lstm_5_days DONE
Restoring model weights from the end of the best epoch: 2.
Epoch 7: early stopping
DJI_gru_5_days DONE
Restoring model weights from the end of the best epoch: 8.
Epoch 13: early stopping
DJI_cnn_5_days DONE
DJI_xgb_5_days DONE
Restoring model weights from the end of the best epoch: 3.
Epoch 8: early stopping
DJI_lstm_10_days DONE
Restoring model weights from the end of the best epoch: 2.
Epoch 7: early stopping
DJI_gru_10_days DONE
Restoring model weights from the end of the best epoch: 2.
Epoch 7: early stopping
DJI_cnn_10_days DONE
DJI_xgb_10_days DONE
Restoring model weights from the end of the best epoch: 5.
Epoch 10: early stopping
DJI_lstm_1_month DONE
Restoring model weights from the end of the best epoch: 3.
Epoch 8: early stopping
DJI_gru_1_month DONE
Restoring model weights from the end of the best epoch: 5.
Epoch 10: early stopping
DJI_cnn_1_month DONE




SPX_xgb_1_day DONE
Restoring model weights from the end of the best epoch: 1.
Epoch 6: early stopping
SPX_lstm_5_days DONE
Restoring model weights from the end of the best epoch: 2.
Epoch 7: early stopping
SPX_gru_5_days DONE
Restoring model weights from the end of the best epoch: 5.
Epoch 10: early stopping
SPX_cnn_5_days DONE
SPX_xgb_5_days DONE
Restoring model weights from the end of the best epoch: 5.
Epoch 10: early stopping
SPX_lstm_10_days DONE
Restoring model weights from the end of the best epoch: 2.
Epoch 7: early stopping
SPX_gru_10_days DONE
Restoring model weights from the end of the best epoch: 3.
Epoch 8: early stopping
SPX_cnn_10_days DONE
SPX_xgb_10_days DONE
Restoring model weights from the end of the best epoch: 2.
Epoch 7: early stopping
SPX_lstm_1_month DONE
Restoring model weights from the end of the best epoch: 6.
Epoch 11: early stopping
SPX_gru_1_month DONE
Restoring model weights from the end of the best epoch: 1.
Epoch 6: early stopping
SPX_cnn_1_month DONE
S

In [282]:
with open(f"{CFG.data_path}/80d/ensemble/prediction_ypred_yvalid.pkl", 'wb') as f:
  pickle.dump(prediction_dict_80d, f)

with open(f"{CFG.data_path}/80d/ensemble/local_scalers.pkl", 'wb') as f:
  pickle.dump(local_scalers_80d, f)

In [174]:
with open(f"{CFG.data_path}/50d/ensemble/prediction_ypred_yvalid.pkl", 'wb') as f:
  pickle.dump(prediction_dict_50d, f)

with open(f"{CFG.data_path}/50d/ensemble/local_scalers.pkl", 'wb') as f:
  pickle.dump(local_scalers_50d, f)

In [196]:
pred_foo_20d_scalers = 0

with open(f"{CFG.data_path}/20d/ensemble/local_scalers.pkl", 'rb') as f:
    pred_foo_20d_scalers = pickle.load(f)

In [202]:
pred_foo_20d_scalers

{'IXIC_scaler': MinMaxScaler(),
 'DJI_scaler': MinMaxScaler(),
 'SPX_scaler': MinMaxScaler()}

In [176]:
pred_foo_20d_scalers = 0

with open(f"{CFG.data_path}/20d/ensemble/local_scalers.pkl", 'rb') as f:
    pred_foo_20d_scalers = pickle.load(f)

pred_foo_20d = 0

with open(f"{CFG.data_path}/20d/ensemble/prediction_ypred_yvalid.pkl", 'rb') as f:
    pred_foo_20d = pickle.load(f)
    
print(pred_foo_20d)


{'IXIC_lstm_1_day': array([[0.84087676],
       [0.84234184],
       [0.8454307 ],
       [0.84893495],
       [0.85286796],
       [0.8567325 ],
       [0.861588  ],
       [0.86611634],
       [0.8706661 ],
       [0.8750862 ],
       [0.87997806],
       [0.8853309 ],
       [0.89087075],
       [0.8961637 ],
       [0.900784  ],
       [0.9051495 ],
       [0.909094  ],
       [0.9126666 ],
       [0.9161351 ],
       [0.91924804],
       [0.9225394 ],
       [0.92533416],
       [0.9279197 ],
       [0.9302764 ],
       [0.93183357],
       [0.9329977 ],
       [0.93362385],
       [0.9337518 ],
       [0.93309975],
       [0.93172157],
       [0.92984885],
       [0.92762446],
       [0.925441  ],
       [0.923756  ],
       [0.9218056 ],
       [0.9195131 ],
       [0.91731125],
       [0.9149447 ],
       [0.91282415],
       [0.9105964 ],
       [0.9082283 ],
       [0.90635026],
       [0.9046571 ],
       [0.90287995],
       [0.9021768 ],
       [0.9010429 ],
       [0.9009

In [177]:
pred_foo_20d.keys()

dict_keys(['IXIC_lstm_1_day', 'IXIC_gru_1_day', 'IXIC_cnn_1_day', 'IXIC_xgb_1_day', 'IXIC_valid_data_1_day', 'IXIC_lstm_5_days', 'IXIC_gru_5_days', 'IXIC_cnn_5_days', 'IXIC_xgb_5_days', 'IXIC_valid_data_5_days', 'IXIC_lstm_10_days', 'IXIC_gru_10_days', 'IXIC_cnn_10_days', 'IXIC_xgb_10_days', 'IXIC_valid_data_10_days', 'IXIC_lstm_1_month', 'IXIC_gru_1_month', 'IXIC_cnn_1_month', 'IXIC_xgb_1_month', 'IXIC_valid_data_1_month', 'DJI_lstm_1_day', 'DJI_gru_1_day', 'DJI_cnn_1_day', 'DJI_xgb_1_day', 'DJI_valid_data_1_day', 'DJI_lstm_5_days', 'DJI_gru_5_days', 'DJI_cnn_5_days', 'DJI_xgb_5_days', 'DJI_valid_data_5_days', 'DJI_lstm_10_days', 'DJI_gru_10_days', 'DJI_cnn_10_days', 'DJI_xgb_10_days', 'DJI_valid_data_10_days', 'DJI_lstm_1_month', 'DJI_gru_1_month', 'DJI_cnn_1_month', 'DJI_xgb_1_month', 'DJI_valid_data_1_month', 'SPX_lstm_1_day', 'SPX_gru_1_day', 'SPX_cnn_1_day', 'SPX_xgb_1_day', 'SPX_valid_data_1_day', 'SPX_lstm_5_days', 'SPX_gru_5_days', 'SPX_cnn_5_days', 'SPX_xgb_5_days', 'SPX_vali

In [301]:
def take_maximum(dictionary):

  new_arrays = {}
  for dataset in ['IXIC', 'DJI', 'SPX']:
    for days in ['1_day', '5_days', '10_days', '1_month']:
      new_array = np.maximum(np.maximum(np.maximum(dictionary[f"{dataset}_lstm_{days}"], 
                                                  dictionary[f"{dataset}_gru_{days}"]), 
                                                  dictionary[f"{dataset}_cnn_{days}"]), 
                                                  dictionary[f"{dataset}_xgb_{days}"])
      new_arrays.update({f"{dataset}_{days}": new_array})
  return new_arrays

def take_minimum(dictionary):

  new_arrays = {}
  for dataset in ['IXIC', 'DJI', 'SPX']:
    for days in ['1_day', '5_days', '10_days', '1_month']:
      new_array = np.minimum(np.minimum(np.minimum(dictionary[f"{dataset}_lstm_{days}"], 
                                                  dictionary[f"{dataset}_gru_{days}"]), 
                                                  dictionary[f"{dataset}_cnn_{days}"]), 
                                                  dictionary[f"{dataset}_xgb_{days}"])
      new_arrays.update({f"{dataset}_{days}": new_array})
  return new_arrays  

def take_maximum_gru_cnn(dictionary):

  new_arrays = {}
  for dataset in ['IXIC', 'DJI', 'SPX']:
    for days in ['1_day', '5_days', '10_days', '1_month']:
      new_array = np.maximum(dictionary[f"{dataset}_cnn_{days}"], 
                            dictionary[f"{dataset}_gru_{days}"])
                                                   
                                                  
      new_arrays.update({f"{dataset}_{days}": new_array})
  return new_arrays

def take_minimum_gru_cnn(dictionary):

  new_arrays = {}
  for dataset in ['IXIC', 'DJI', 'SPX']:
    for days in ['1_day', '5_days', '10_days', '1_month']:
      new_array = np.minimum(dictionary[f"{dataset}_cnn_{days}"], 
                            dictionary[f"{dataset}_gru_{days}"])
                                                  
      new_arrays.update({f"{dataset}_{days}": new_array})
  return new_arrays

def take_avg(dictionary):

  new_arrays = {}
  for dataset in ['IXIC', 'DJI', 'SPX']:
    for days in ['1_day', '5_days', '10_days', '1_month']:
      new_array = (dictionary[f"{dataset}_lstm_{days}"] +
                  dictionary[f"{dataset}_gru_{days}"] +
                  dictionary[f"{dataset}_cnn_{days}"] +
                  dictionary[f"{dataset}_xgb_{days}"]) / 4
      new_arrays.update({f"{dataset}_{days}": new_array})
  return new_arrays

def take_avg_from_gru_cnn(dictionary):

  new_arrays = {}
  for dataset in ['IXIC', 'DJI', 'SPX']:
    for days in ['1_day', '5_days', '10_days', '1_month']:
      new_array = (dictionary[f"{dataset}_gru_{days}"] +
                  dictionary[f"{dataset}_cnn_{days}"]) / 2
      new_arrays.update({f"{dataset}_{days}": new_array})
  return new_arrays

In [302]:
def calc_ensemble_err_rmse(dictionary, dictionary_scalers):
  ensemble_arr = {}

  friendly_max_arr = []
  friendly_min_arr = []
  friendly_avg_arr = []

  friendly_max_gru_cnn_arr = []
  friendly_min_gru_cnn_arr = []
  friendly_avg_gru_cnn_arr = []

  foo_avg_arr = take_avg(dictionary)
  foo_min_arr = take_minimum(dictionary)
  foo_max_arr = take_maximum(dictionary)

  foo_min_gru_cnn_arr = take_minimum_gru_cnn(dictionary)
  foo_max_gru_cnn_arr = take_maximum_gru_cnn(dictionary)
  foo_avg_gru_cnn_arr = take_avg_from_gru_cnn(dictionary)


  for dataset in ['IXIC', 'DJI', 'SPX']:
    foo_list_max = f"{dataset} "
    foo_list_min = f"{dataset} "
    foo_list_avg = f"{dataset} "

    foo_list_max_gru_cnn = f"{dataset} "
    foo_list_min_gru_cnn = f"{dataset} "
    foo_list_avg_gru_cnn = f"{dataset} "

    for days in ['1_day', '5_days', '10_days', '1_month']:
      
      #
      #
      ens_val_avg = my_rmse(dictionary_scalers[f"{dataset}_scaler"].inverse_transform(dictionary[f"{dataset}_valid_data_{days}"]), 
                            dictionary_scalers[f"{dataset}_scaler"].inverse_transform(foo_avg_arr[f"{dataset}_{days}"]))
      ensemble_arr.update({f"{dataset}_{days}_avg": ens_val_avg})
      #
      #
      ens_val_min = my_rmse(dictionary_scalers[f"{dataset}_scaler"].inverse_transform(dictionary[f"{dataset}_valid_data_{days}"]), 
                            dictionary_scalers[f"{dataset}_scaler"].inverse_transform(foo_min_arr[f"{dataset}_{days}"]))
      ensemble_arr.update({f"{dataset}_{days}_min": ens_val_min})
      #
      #
      ens_val_max = my_rmse(dictionary_scalers[f"{dataset}_scaler"].inverse_transform(dictionary[f"{dataset}_valid_data_{days}"]), 
                            dictionary_scalers[f"{dataset}_scaler"].inverse_transform(foo_max_arr[f"{dataset}_{days}"]))
      ensemble_arr.update({f"{dataset}_{days}_max": ens_val_max})
      #
      #
      ens_val_max_gru_cnn = my_rmse(dictionary_scalers[f"{dataset}_scaler"].inverse_transform(dictionary[f"{dataset}_valid_data_{days}"]), 
                            dictionary_scalers[f"{dataset}_scaler"].inverse_transform(foo_max_gru_cnn_arr[f"{dataset}_{days}"]))
      ensemble_arr.update({f"{dataset}_{days}_max": ens_val_max_gru_cnn})
      #
      #
      ens_val_min_gru_cnn = my_rmse(dictionary_scalers[f"{dataset}_scaler"].inverse_transform(dictionary[f"{dataset}_valid_data_{days}"]), 
                            dictionary_scalers[f"{dataset}_scaler"].inverse_transform(foo_min_gru_cnn_arr[f"{dataset}_{days}"]))
      ensemble_arr.update({f"{dataset}_{days}_max": ens_val_min_gru_cnn})
      #
      #
      ens_val_avg_gru_cnn = my_rmse(dictionary_scalers[f"{dataset}_scaler"].inverse_transform(dictionary[f"{dataset}_valid_data_{days}"]), 
                                  dictionary_scalers[f"{dataset}_scaler"].inverse_transform(foo_avg_gru_cnn_arr[f"{dataset}_{days}"]))
      ensemble_arr.update({f"{dataset}_{days}_avg_gru_cnn": ens_val_avg_gru_cnn})

      foo_list_max += f"{ens_val_max} & "
      foo_list_min += f"{ens_val_min} & "
      foo_list_avg += f"{ens_val_avg} & "
      foo_list_max_gru_cnn += f"{ens_val_max_gru_cnn} & "
      foo_list_min_gru_cnn += f"{ens_val_min_gru_cnn} & "
      foo_list_avg_gru_cnn += f"{ens_val_avg_gru_cnn} & "

   
    friendly_max_arr.append(foo_list_max)
    friendly_min_arr.append(foo_list_min)
    friendly_avg_arr.append(foo_list_avg)

    friendly_max_gru_cnn_arr.append(foo_list_max_gru_cnn)
    friendly_min_gru_cnn_arr.append(foo_list_min_gru_cnn)
    friendly_avg_gru_cnn_arr.append(foo_list_avg_gru_cnn)

  return ensemble_arr, friendly_max_arr, friendly_min_arr, friendly_avg_arr, friendly_max_gru_cnn_arr, friendly_min_gru_cnn_arr, friendly_avg_gru_cnn_arr

In [None]:
pred_foo_20d = 0
pred_foo_20d_scalers = 0

with open(f"{CFG.data_path}/20d/ensemble/prediction_ypred_yvalid.pkl", 'rb') as f:
    pred_foo_20d = pickle.load(f)

with open(f"{CFG.data_path}/20d/ensemble/local_scalers.pkl", 'rb') as f:
    pred_foo_20d_scalers = pickle.load(f)

In [307]:
_, _, _, _, friendly_max_gru_cnn_arr, friendly_min_gru_cnn_arr, _ = calc_ensemble_err_rmse(pred_foo_80d, pred_foo_80d_scalers)

In [308]:
friendly_max_gru_cnn_arr, friendly_min_gru_cnn_arr

(['IXIC 568.621 & 700.1288 & 755.758 & 1033.6339 & ',
  'DJI 996.5545 & 1016.1474 & 1252.8736 & 1886.8288 & ',
  'SPX 160.5248 & 202.4357 & 244.7957 & 232.3758 & '],
 ['IXIC 445.8942 & 632.354 & 661.6066 & 1110.9428 & ',
  'DJI 831.8319 & 1063.3606 & 1306.868 & 1623.5928 & ',
  'SPX 123.837 & 148.6444 & 162.0672 & 251.3738 & '])

In [277]:
pred_foo_50d = 0
pred_foo_50d_scalers = 0

with open(f"{CFG.data_path}/50d/ensemble/prediction_ypred_yvalid.pkl", 'rb') as f:
    pred_foo_50d = pickle.load(f)

with open(f"{CFG.data_path}/50d/ensemble/local_scalers.pkl", 'rb') as f:
    pred_foo_50d_scalers = pickle.load(f)

In [299]:
ens_50d_rmse, friendly_max_arr, friendly_min_arr, friendly_avg_arr, friendly_avg_gru_cnn_arr = calc_ensemble_err_rmse(pred_foo_50d, pred_foo_50d_scalers)

In [300]:
friendly_avg_gru_cnn_arr

['IXIC 519.4785 & 573.1805 & 722.204 & 930.8661 & ',
 'DJI 832.7307 & 980.3297 & 1189.3685 & 1606.6033 & ',
 'SPX 124.9443 & 146.3977 & 167.6845 & 232.7335 & ']

In [288]:
pred_foo_80d = 0
pred_foo_80d_scalers = 0

with open(f"{CFG.data_path}/80d/ensemble/prediction_ypred_yvalid.pkl", 'rb') as f:
    pred_foo_80d = pickle.load(f)

with open(f"{CFG.data_path}/80d/ensemble/local_scalers.pkl", 'rb') as f:
    pred_foo_80d_scalers = pickle.load(f)

In [293]:
ens_80d_rmse, friendly_max_arr, friendly_min_arr, friendly_avg_arr, friendly_avg_gru_cnn_arr  = calc_ensemble_err_rmse(pred_foo_80d, pred_foo_80d_scalers)

In [296]:
friendly_max_arr, friendly_min_arr, friendly_avg_arr, friendly_avg_gru_cnn_arr

(['IXIC 651.2994 & 897.883 & 973.3546 & 1300.4148 & ',
  'DJI 1131.2762 & 1313.6121 & 1527.0639 & 2341.2278 & ',
  'SPX 168.2734 & 230.325 & 267.429 & 288.4047 & '],
 ['IXIC 538.8819 & 652.9886 & 772.8444 & 1131.5247 & ',
  'DJI 1140.6272 & 1244.1807 & 1474.0222 & 2086.2189 & ',
  'SPX 163.1406 & 198.8585 & 218.8819 & 282.7971 & '],
 ['IXIC 487.5528 & 649.1597 & 746.0715 & 1045.5957 & ',
  'DJI 913.9161 & 1025.6347 & 1267.8063 & 1687.0171 & ',
  'SPX 133.0207 & 167.2679 & 188.0037 & 241.1517 & '],
 ['IXIC 473.3459 & 626.4082 & 687.9141 & 1001.0658 & ',
  'DJI 901.7551 & 987.7348 & 1205.5848 & 1615.8264 & ',
  'SPX 129.5351 & 166.5952 & 187.7399 & 232.1105 & '])

In [235]:
ens_20d_rmse = calc_ensemble_err_rmse()

In [241]:
with open(f"{CFG.data_path}/20d/ensemble/ens_rmse_errors.pkl", 'wb') as f:
  pickle.dump(ens_20d_rmse, f)

In [240]:
ens_20d_rmse

{'IXIC_1_day_avg': 483.5827,
 'IXIC_1_day_min': 586.0833,
 'IXIC_1_day_max': 553.4261,
 'IXIC_5_days_avg': 603.5157,
 'IXIC_5_days_min': 657.4704,
 'IXIC_5_days_max': 763.6896,
 'IXIC_10_days_avg': 705.8025,
 'IXIC_10_days_min': 786.3832,
 'IXIC_10_days_max': 863.656,
 'IXIC_1_month_avg': 955.7435,
 'IXIC_1_month_min': 948.1783,
 'IXIC_1_month_max': 1089.8476,
 'DJI_1_day_avg': 831.4623,
 'DJI_1_day_min': 969.9553,
 'DJI_1_day_max': 1021.2635,
 'DJI_5_days_avg': 1090.7445,
 'DJI_5_days_min': 1195.8833,
 'DJI_5_days_max': 1202.8723,
 'DJI_10_days_avg': 1269.4625,
 'DJI_10_days_min': 1499.7353,
 'DJI_10_days_max': 1401.337,
 'DJI_1_month_avg': 1717.6117,
 'DJI_1_month_min': 2178.683,
 'DJI_1_month_max': 1969.3633,
 'SPX_1_day_avg': 123.0113,
 'SPX_1_day_min': 155.6919,
 'SPX_1_day_max': 147.5459,
 'SPX_5_days_avg': 157.6765,
 'SPX_5_days_min': 201.2516,
 'SPX_5_days_max': 172.3794,
 'SPX_10_days_avg': 181.0714,
 'SPX_10_days_min': 218.5319,
 'SPX_10_days_max': 199.9963,
 'SPX_1_month_avg

In [139]:
prediction_dict3 = {}
prediction_dict3, local_scaler3 = ensemble_pipe(20, '20d', 'ensemble')

Restoring model weights from the end of the best epoch: 5.
Epoch 10: early stopping
IXIC_lstm_1_day DONE
Restoring model weights from the end of the best epoch: 2.
Epoch 7: early stopping
IXIC_gru_1_day DONE
Restoring model weights from the end of the best epoch: 6.
Epoch 11: early stopping
IXIC_cnn_1_day DONE




IXIC_xgb_1_day DONE
valid is [[0.92183832]
 [0.92268511]
 [0.92269733]
 [0.94267034]
 [0.94740009]
 [0.95657651]
 [0.96162712]
 [0.97686752]
 [0.9889794 ]
 [0.9919225 ]
 [0.99293587]
 [0.98392034]
 [0.95909713]
 [0.96677191]
 [0.98151357]
 [0.98084457]
 [0.99213613]
 [0.98721627]
 [0.99400373]
 [1.        ]
 [0.9809302 ]
 [0.97343892]
 [0.98003361]
 [0.94676693]
 [0.97416249]
 [0.95109782]
 [0.92441067]
 [0.93638528]
 [0.90854933]
 [0.92169159]
 [0.96513851]
 [0.97455391]
 [0.94918592]
 [0.95983945]
 [0.93939233]
 [0.92286667]
 [0.9537219 ]
 [0.91748391]
 [0.91647246]
 [0.89871439]
 [0.93260008]
 [0.94961116]
 [0.96198188]
 [0.98248265]
 [0.97405802]
 [0.97259966]
 [0.97027944]
 [0.9611915 ]
 [0.97886404]
 [0.95909805]
 [0.90993244]
 [0.90811564]
 [0.89447666]
 [0.89512866]
 [0.91494547]
 [0.91823285]
 [0.88233078]
 [0.89051082]
 [0.85411296]
 [0.83843409]
 [0.82091114]
 [0.78467793]
 [0.79278925]
 [0.76307254]
 [0.76333882]
 [0.74552423]
 [0.78483322]
 [0.82898956]
 [0.83897418]
 [0.8

In [141]:
prediction_dict3.keys()

dict_keys(['IXIC_lstm_1_day', 'IXIC_gru_1_day', 'IXIC_cnn_1_day', 'IXIC_xgb_1_day', 'IXIC_valid_data_1_day'])

In [70]:
prediction_list2 = []
prediction_list2 = ensemble_pipe(20, '20d', 'ensemble')
# prediction_list2

Restoring model weights from the end of the best epoch: 1.
Epoch 6: early stopping
IXIC_lstm_1_day DONE
Restoring model weights from the end of the best epoch: 4.
Epoch 9: early stopping
IXIC_gru_1_day DONE
Restoring model weights from the end of the best epoch: 4.
Epoch 9: early stopping
IXIC_cnn_1_day DONE




IXIC_xgb_1_day DONE
Restoring model weights from the end of the best epoch: 2.
Epoch 7: early stopping
IXIC_lstm_5_days DONE
Restoring model weights from the end of the best epoch: 2.
Epoch 7: early stopping
IXIC_gru_5_days DONE
Restoring model weights from the end of the best epoch: 7.
Epoch 12: early stopping
IXIC_cnn_5_days DONE




IXIC_xgb_5_days DONE
Restoring model weights from the end of the best epoch: 4.
Epoch 9: early stopping
IXIC_lstm_10_days DONE
Restoring model weights from the end of the best epoch: 3.
Epoch 8: early stopping
IXIC_gru_10_days DONE
Restoring model weights from the end of the best epoch: 2.
Epoch 7: early stopping
IXIC_cnn_10_days DONE




IXIC_xgb_10_days DONE
Restoring model weights from the end of the best epoch: 2.
Epoch 7: early stopping
IXIC_lstm_1_month DONE
Restoring model weights from the end of the best epoch: 2.
Epoch 7: early stopping
IXIC_gru_1_month DONE
Restoring model weights from the end of the best epoch: 13.
Epoch 18: early stopping
IXIC_cnn_1_month DONE




IXIC_xgb_1_month DONE
