In [1]:
import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.metrics import mean_absolute_error

import random 

from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, LSTM,GRU,SimpleRNN, RNN, Input, Bidirectional
from sklearn.preprocessing import MinMaxScaler, RobustScaler
from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping, ReduceLROnPlateau, LearningRateScheduler
from sklearn.model_selection import GroupKFold

from tensorflow.keras.optimizers.schedules import ExponentialDecay

from sklearn.metrics import mean_squared_error as mse

from src.utils import *



In [41]:
class CFG:
    data_path = "../data"
    img_dim1 = 20
    img_dim2 = 10
    nepochs = 6
    seed = 42
    EPOCH = 300
    bsize = 16
    BATCH_SIZE = 1024


# adjust the parameters for displayed figures    
plt.rcParams.update({'figure.figsize': (CFG.img_dim1,CFG.img_dim2)})   


def seed_everything(seed: int = 42) -> None:
    np.random.seed(seed)
    os.environ["PYTHONHASHSEED"] = str(seed)
    
seed_everything(CFG.seed)

In [None]:
spy_ohlc_df = pd.read_csv(CFG.data_path + '/' + 'spy_ohlc_df.csv')
dow_jones_ohlc_df = pd.read_csv(CFG.data_path + '/' + 'dow_jones_ohlc_df.csv')
nasdaq_ohlc_df = pd.read_csv(CFG.data_path + '/' + 'nasdaq_ohlc_df.csv')

In [None]:
df = nasdaq_ohlc_df[['Date', 'Close']].copy()
df['Date'] = pd.to_datetime(df['Date'])
df.set_index('Date', inplace=True)
df.head()

In [None]:
scaler = MinMaxScaler()
df = scaler.fit_transform(df)

In [None]:
look_back = 50
look_ahead = 10

xdat, ydat = create_dataset(df, look_back = look_back, look_ahead = look_ahead)

xtrain, ytrain, xvalid, yvalid = prepare_split(xdat, ydat, cutoff = 5000, timesteps= look_back)

In [None]:
early_stop = EarlyStopping(monitor = 'val_loss', min_delta = 0.001, 
                           patience = 5, mode = 'min', verbose = 1,
                           restore_best_weights = True)

In [None]:
def create_model4(out_dim):
    
    model=Sequential()
    model.add(GRU(10,input_shape= [None,1], return_sequences = True))
    model.add(GRU(10,input_shape= [None,1]))
    model.add(Dense(out_dim))
    
    model.compile(loss='mean_squared_error',optimizer='adam')
    return model
    
model4 = create_model4(out_dim = look_ahead)
model4.summary()

In [None]:
model4.fit(xtrain, 
           ytrain, 
           validation_data=(xvalid, yvalid), 
           epochs = CFG.nepochs, 
           batch_size = CFG.bsize, 
           callbacks=[early_stop])

y_pred4 = model4.predict(xvalid)
y_pred4 = scaler.inverse_transform(y_pred4)

In [None]:
print('RMSE: ' + str(my_rmse(y_pred4, scaler.inverse_transform(yvalid))))

In [None]:
plt.plot(scaler.inverse_transform(yvalid), label = 'real')
plt.plot(y_pred4, label = 'predicted')
plt.ylabel('')
plt.legend()
plt.show()

In [6]:
import yfinance as yf
from yahooquery import Ticker
from yahooquery import Screener

from tensorflow.keras.layers import Dense, LSTM,GRU,SimpleRNN, RNN, Input, Bidirectional, Dropout

In [7]:
class CFG:
    data_path = "../data"
    img_dim1 = 20
    img_dim2 = 10
    seed = 42
   

# adjust the parameters for displayed figures    
plt.rcParams.update({'figure.figsize': (CFG.img_dim1,CFG.img_dim2)})   


def seed_everything(seed: int = 42) -> None:
    np.random.seed(seed)
    os.environ["PYTHONHASHSEED"] = str(seed)
    
seed_everything(CFG.seed)

In [8]:
gru_predictions_n_errors = pd.DataFrame(
                                        data=None,
                                        index=None,
                                        columns=['1_day', '5_days', '10_days', '1_month', '2_months', '6_months', '1_year']
                                        )

gru_predictions_n_errors['Dataset'] = ['IXIC', 'IXIC', 'IXIC', 'DJI', 'DJI', 'DJI', 'SPX', 'SPX', 'SPX']
gru_predictions_n_errors['Metric'] = ['RMSE', 'MAE', 'MAPE', 'RMSE', 'MAE', 'MAPE', 'RMSE', 'MAE', 'MAPE']

gru_predictions_n_errors.set_index(['Dataset', 'Metric'], inplace=True)

In [9]:
nq = yf.download(tickers = "^IXIC",  
                    start='2017-01-01', 
                    end='2023-01-01')

dji = yf.download(tickers = "^DJI",  
                    start='2017-01-01', 
                    end='2023-01-01')

spx = yf.download(tickers = "^SPX",  
                    start='2017-01-01', 
                    end='2023-01-01')

[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed


In [10]:
merged_df_dict = {
                  'Date': np.array(nq.index), 
                  'IXIC': np.array(nq.Close.values), 
                  'SPX': np.array(spx.Close.values), 
                  'DJI': np.array(dji.Close.values)
                  }

merged_df_index = nq.index

merged_df = pd.DataFrame(data=merged_df_dict, index=merged_df_index, columns=['Date', 'IXIC', 'SPX', 'DJI'])

In [11]:
def create_gru_for_ts(output_dim=1):

  model=Sequential()
  model.add(GRU(64, return_sequences = True, input_shape= [None,1]))
  model.add(Dropout(0.2))
  model.add(GRU(128, return_sequences = True))
  model.add(Dropout(0.2))
  model.add(GRU(64, return_sequences = True))
  model.add(Dropout(0.2))
  model.add(GRU(32))
  model.add(Dense(output_dim))
    
  model.compile(loss='mean_squared_error',optimizer='adam')
  return model


        

In [12]:
early_stop = EarlyStopping(
                          monitor = 'val_loss', 
                          min_delta = 0.001, 
                          patience = 5, 
                          mode = 'min', 
                          verbose = 1,
                          restore_best_weights = True
                          )

In [19]:
def gru_pipe(lookback_horizon, save_dir, model_str, gru_predictions_n_errors):

        for dataset in set(gru_predictions_n_errors.index.get_level_values(0)):

                df = merged_df[['Date', dataset]].copy()
                df['Date'] = pd.to_datetime(df['Date'])
                df.set_index('Date', inplace=True)

                scaler = MinMaxScaler()
                df = scaler.fit_transform(df)
        
                for pred_horizon, horizon_string in [[1, '1_day'], [5, '5_days'], [10, '10_days'], [30, '1_month']]:

                        look_back = lookback_horizon
                        look_ahead = pred_horizon

                        xdat, ydat = create_dataset(
                                                df,
                                                look_back = look_back,
                                                look_ahead = look_ahead
                                                )

                        # [batch size, time steps, dimensionality]
                        xtrain, ytrain, xvalid, yvalid = prepare_split(
                                                                xdat,
                                                                ydat,
                                                                cutoff = int(len(xdat) * 0.8),
                                                                timesteps = look_back
                                                                )
                        print(xtrain.shape)
                        model = create_gru_for_ts(look_ahead)

                        model.fit(
                                xtrain, 
                                ytrain, 
                                validation_data = (xvalid, yvalid), 
                                epochs = 20, 
                                batch_size = 32,
                                callbacks=[early_stop]
                                )

                        y_pred = model.predict(xvalid)
                        
                        y_pred = scaler.inverse_transform(y_pred)
                        yvalid = scaler.inverse_transform(yvalid)
                        # if pred_horizon == 1:
                        #         plt.plot(yvalid, label="True")
                        #         plt.plot(y_pred, label="Forecast")
                        # else:

                                # plt.plot([item[0] for item in yvalid], label="True")
                        plt.plot([item[0] for item in yvalid], label="True")
                        # plt.plot([item.mean() for item in y_pred], label="Forecast")
                        # plt.plot([item[0] for item in y_pred], label="Forecast")
                        plt.plot([item[0] for item in y_pred], label="Forecast")
                        plt.legend()
                        plt.savefig(
                                f"{CFG.data_path}/{save_dir}/{model_str}/{model_str}_{dataset}_{horizon_string}.jpg", 
                                bbox_inches='tight', dpi=120
                                )
                        plt.clf()

                        gru_predictions_n_errors = assign_values(gru_predictions_n_errors,
                                                                        dataset,
                                                                        y_pred,
                                                                        yvalid,
                                                                        horizon_string)

        gru_predictions_n_errors.to_csv(f"{CFG.data_path}/{save_dir}/{model_str}/{model_str}_{save_dir}_window_errors.csv")
# y_pred5 = model5.predict(xvalid)
# y_pred5 = scaler.inverse_transform(y_pred5)

In [26]:
gru_pipe(20, '20d', 'gru_new', gru_predictions_n_errors)

(1191, 20, 1)
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 6: early stopping
(1188, 20, 1)
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 8: early stopping
(1184, 20, 1)
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 10: early stopping
(1168, 20, 1)
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 8: early stopping
(1191, 20, 1)
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 7: early stopping
(1188, 20, 1)
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 8: early stopping
(1184, 20, 1)
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 7: early stopping
(1168, 20, 1)
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 8: early stopping
(1191, 20, 1

<Figure size 1440x720 with 0 Axes>

In [27]:
gru_pipe(50, '50d', 'gru_new', gru_predictions_n_errors)

(1167, 50, 1)
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 6: early stopping
(1164, 50, 1)
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 6: early stopping
(1160, 50, 1)
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 8: early stopping
(1144, 50, 1)
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 8: early stopping
(1167, 50, 1)
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 6: early stopping
(1164, 50, 1)
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 6: early stopping
(1160, 50, 1)
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 7: early stopping
(1144, 50, 1)
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 8: early stopping
(1167, 50, 1)
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 6: ea

<Figure size 1440x720 with 0 Axes>

In [28]:
gru_pipe(80, '80d', 'gru_new', gru_predictions_n_errors)

(1143, 80, 1)
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 6: early stopping
(1140, 80, 1)
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 7: early stopping
(1136, 80, 1)
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 7: early stopping
(1120, 80, 1)
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 16: early stopping
(1143, 80, 1)
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 7: early stopping
(1140, 80, 1)
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 7: early stopping
(1136, 80, 1)
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 7: early stopping
(1120, 80, 1)
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 7:

<Figure size 1440x720 with 0 Axes>