In [1]:
import pandas as pd

data = pd.read_csv('data/^NDX_raw_data.csv')
data.rename(columns={'Date': 'date', 'Open':'open', 'High':'high', 'Low':'low', 'Close':'close', 'Volume':'volume'}, inplace=True)

data_backup = data.iloc[3524:]

data = data.iloc[:3524]
data_copy = data.copy()

print('Data imported and copied.', flush=True)

Data imported and copied.


In [2]:
import numpy as np

## Creating sequences
def create_dataset(dataset, time_step=1, output_step=1):
    dataX, dataY = [], []
    for i in range(len(dataset)-time_step-output_step):
        a = dataset[i:(i+time_step), 0]
        b = dataset[(i+time_step):(i+time_step)+output_step, 0]
        dataX.append(a)
        dataY.append(b)

    return np.array(dataX), np.array(dataY)

period = 60
trend_period = 14
rsi_period = 14
num_features = 3
input_period = 46
output_step = 7
units = 512

In [3]:
X, y = create_dataset(data[['close']].to_numpy(), time_step=period, output_step=output_step)

In [4]:
data_input = np.array([np.concatenate((X[i], y[i]), axis=0) for i in range(X.shape[0])])

In [5]:
from statsmodels.tsa.seasonal import seasonal_decompose

decompositions = np.array([seasonal_decompose(data_input[i], model='additive', period=14) for i in range(data_input.shape[0])])
trends = np.array([decompositions[i].trend for i in range(decompositions.shape[0])])
seasons = np.array([decompositions[i].seasonal for i in range(decompositions.shape[0])])

In [6]:
from talib import RSI

rsi = np.array([ RSI(data_input[i]) for i in range(data_input.shape[0]) ])

In [7]:
trends_dropna = list()
seasons_cropped = list()
rsi_dropna = list()

for trend in trends:
    trends_dropna.append(trend[~np.isnan(trend)])

for season in seasons:
    #seasons_cropped.append(season[int(trend_period/2):-int(trend_period/2)])
    seasons_cropped.append(season[trend_period:])

for r in rsi:
    #rsi_dropna.append(r[-53:])
    rsi_dropna.append(r[rsi_period:])

trends_dropna = np.array(trends_dropna)
seasons_cropped = np.array(seasons_cropped)
rsi_dropna = np.array(rsi_dropna)

In [8]:
from sklearn.preprocessing import MinMaxScaler
from sklearn.preprocessing import StandardScaler

scaler = list(StandardScaler() for i in range(trends_dropna.shape[0]))
trends_scaled = list()

scaler_seasonal = list(StandardScaler() for i in range(seasons_cropped.shape[0]))
seasons_scaled = list()

scaler_rsi = list(MinMaxScaler() for i in range(rsi_dropna.shape[0]))
rsi_scaled = list()

for i in range(trends_dropna.shape[0]):
    trends_scaled.append(scaler[i].fit_transform(trends_dropna[i].reshape(-1,1)))

for i in range(seasons_cropped.shape[0]):
    seasons_scaled.append(scaler_seasonal[i].fit_transform(seasons_cropped[i].reshape(-1,1)))

for i in range(rsi_dropna.shape[0]):
    rsi_scaled.append(scaler_rsi[i].fit_transform(rsi_dropna[i].reshape(-1,1)))

trends_scaled = np.array(trends_scaled)
seasons_scaled = np.array(seasons_scaled)
rsi_scaled = np.array(rsi_scaled)

In [9]:
X_input = list()
y_input = list()

for trend, season, rsi in zip(trends_scaled, seasons_scaled, rsi_scaled):
    X_input.append(np.hstack((trend[:trend.shape[0]-7], season[:season.shape[0]-7], rsi[:rsi.shape[0]-7])))
    y_input.append(trend[trend.shape[0]-7:])

X_input = np.array(X_input)
y_input = np.array(y_input)

## BiLSTM channel

In [10]:
import numpy as np
from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Bidirectional, LSTM, Dense, Dropout
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint

# Build LSTM model
model_channel_1 = Sequential([
    Bidirectional(LSTM(units=192*2, activation='tanh', recurrent_activation='sigmoid'), input_shape=(input_period, num_features)),
    Dropout(0.2),
    Dense(output_step)
])

# Compile the model
model_channel_1.compile(optimizer='adam', loss='mean_squared_error')

# Define a ModelCheckpoint callback to save weights at the end of each epoch
checkpoint = ModelCheckpoint(filepath='model_weights_6_3_channel_1/model_weights_epoch_{epoch:02d}.h5', 
                            save_best_only=True, save_weights_only=True)

# Adding early stopping to prevent overfitting
early_stopping = EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)

2024-05-27 18:07:53.503061: I external/local_tsl/tsl/cuda/cudart_stub.cc:31] Could not find cuda drivers on your machine, GPU will not be used.
2024-05-27 18:07:53.571269: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-05-27 18:07:53.571336: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-05-27 18:07:53.574815: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2024-05-27 18:07:53.598295: I external/local_tsl/tsl/cuda/cudart_stub.cc:31] Could not find cuda drivers on your machine, GPU will not be used.


## CNN channel

In [11]:
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv1D, AveragePooling1D, Flatten, Dense, Dropout, Reshape

model_channel_2 = Sequential([
    Conv1D(filters=64, kernel_size=1),
    AveragePooling1D(),
    Flatten(),
    Dense(units=192),
    Dense(units=output_step),
    Reshape((output_step,1))
])

# Compile the model
model_channel_2.compile(optimizer='adam', loss='mean_squared_error')

# Define a ModelCheckpoint callback to save weights at the end of each epoch
checkpoint = ModelCheckpoint(filepath='model_weights_6_3_channel_2/model_weights_epoch_{epoch:02d}.h5', 
                            save_best_only=True, save_weights_only=True)

# Adding early stopping to prevent overfitting
early_stopping = EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)

## RNN channel

In [12]:
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import SimpleRNN, Dense, Flatten
from tensorflow.keras.initializers import Zeros

model_channel_3 = Sequential([
    SimpleRNN(units=128, activation='tanh', input_shape=(input_period, num_features)),
    Dropout(0.1),
    Flatten(),
    Dense(units=output_step, kernel_initializer=Zeros(), use_bias=False),
    Reshape((output_step,1))
])

# Compile the model
model_channel_3.compile(optimizer='adam', loss='mean_squared_error')

# Define a ModelCheckpoint callback to save weights at the end of each epoch
checkpoint = ModelCheckpoint(filepath='model_weights_6_3_channel_3/model_weights_epoch_{epoch:02d}.h5', 
                            save_best_only=True, save_weights_only=True)

# Adding early stopping to prevent overfitting
early_stopping = EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)

## LSTM channel

In [13]:
import numpy as np
from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Bidirectional, LSTM, Dense, Dropout
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint
from tensorflow.keras.initializers import Zeros

# Build LSTM model
model_channel_4 = Sequential([
    LSTM(units=480, input_shape=(input_period, num_features), activation='tanh', recurrent_activation='sigmoid'),
    Dense(units=output_step, kernel_initializer=Zeros(), use_bias=False),
    Reshape((output_step,1))
])

# Compile the model
model_channel_4.compile(optimizer='adam', loss='mean_squared_error')

# Define a ModelCheckpoint callback to save weights at the end of each epoch
checkpoint = ModelCheckpoint(filepath='model_weights_6_3_channel_4/model_weights_epoch_{epoch:02d}.h5', 
                            save_best_only=True, save_weights_only=True)

# Adding early stopping to prevent overfitting
early_stopping = EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)

## Stacked LSTM channel

In [14]:
import numpy as np
from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Bidirectional, LSTM, Dense, Dropout, Flatten
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint
from tensorflow.keras.initializers import Zeros

# Build LSTM model
model_channel_5 = Sequential([
    LSTM(units=512, return_sequences=True, input_shape=(input_period, num_features), activation='tanh', recurrent_activation='sigmoid'),
    Dropout(0.2),
    LSTM(units=512, return_sequences=True, activation='tanh', recurrent_activation='sigmoid'),
    Flatten(),
    Dense(units=output_step, kernel_initializer=Zeros(), use_bias=False),
    Reshape((output_step,1))
])

# Compile the model
model_channel_5.compile(optimizer='adam', loss='mean_squared_error')

# Define a ModelCheckpoint callback to save weights at the end of each epoch
checkpoint = ModelCheckpoint(filepath='model_weights_6_3_channel_5/model_weights_epoch_{epoch:02d}.h5', 
                            save_best_only=True, save_weights_only=True)

# Adding early stopping to prevent overfitting
early_stopping = EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)

## Multi Layer Perceptron channel

In [15]:
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense

model_channel_6 = Sequential([
    Dense(units=512, activation='tanh', input_shape=(input_period, num_features)),
    Dropout(0.1),
    Dense(output_step, activation='linear')
])

# Compile the model
model_channel_6.compile(optimizer='adam', loss='mean_squared_error')

# Define a ModelCheckpoint callback to save weights at the end of each epoch
checkpoint = ModelCheckpoint(filepath='model_weights_6_3_channel_6/model_weights_epoch_{epoch:02d}.h5', 
                            save_best_only=True, save_weights_only=True)

# Adding early stopping to prevent overfitting
early_stopping = EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)

## GRU channel

In [16]:
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import GRU, Dense, Flatten

model_channel_7 = Sequential([
    GRU(units=192, activation='tanh', return_sequences=True, input_shape=(input_period, num_features)),
    Dropout(0.4),
    Flatten(),
    Dense(units=output_step, kernel_initializer=Zeros(), use_bias=False),
    Reshape((output_step,1))
])

# Compile the model
model_channel_7.compile(optimizer='adam', loss='mean_squared_error')

# Define a ModelCheckpoint callback to save weights at the end of each epoch
checkpoint = ModelCheckpoint(filepath='model_weights_6_3_channel_7/model_weights_epoch_{epoch:02d}.h5', 
                            save_best_only=True, save_weights_only=True)

# Adding early stopping to prevent overfitting
early_stopping = EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)