In [1]:
import pandas as pd
import numpy as np

In [2]:
data_path='training_set_filtered.csv'
data=pd.read_csv(data_path)

In [3]:
# Drop unnecessary columns
data =data [['ExecutionTime','high','low','close','volume']] 
data.set_index('ExecutionTime',drop=True,inplace=True) 
data.head()

Unnamed: 0_level_0,high,low,close,volume
ExecutionTime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2021-01-07 20:15:00+01:00,57.28,55.2,57.28,1.125
2021-01-07 20:30:00+01:00,61.9,59.7,61.3,5.15
2021-01-07 20:45:00+01:00,61.9,59.7,61.3,0.0
2021-01-07 21:00:00+01:00,59.94,56.1,56.1,0.2
2021-01-07 21:15:00+01:00,63.75,61.0,63.47,2.525


In [4]:
from sklearn.preprocessing import MinMaxScaler
scaler = MinMaxScaler()
data[data.columns] = scaler.fit_transform(data )

training_size = round(len(data ) * 0.80)

train_data = data[:training_size]
test_data  = data[training_size:]

In [5]:
train_data.shape, test_data.shape

((6521898, 4), (1630474, 4))

In [6]:
def create_sequence(dataset, n_steps_in=30, n_steps_out=10, gap=4):
    sequences = []
    labels = []
    start_idx = 0
    
    for stop_idx in range(n_steps_in, len(dataset) - n_steps_out - gap + 1, n_steps_in + gap + n_steps_out):
        # Append the input sequence (30 time steps)
        sequences.append(dataset.iloc[start_idx:stop_idx])
        
        # Append the label sequence (the next 10 time steps, starting after the gap)
        labels.append(dataset.iloc[stop_idx + gap:stop_idx + gap + n_steps_out].values)
        
        # Move the start_idx to the next position
        start_idx += n_steps_in + gap + n_steps_out

    return np.array(sequences), np.array(labels)

In [7]:
# Create sequences and labels
train_seq, train_label = create_sequence(train_data)
test_seq, test_label = create_sequence(test_data)

In [8]:
# Check the shapes
train_seq.shape, train_label.shape, test_seq.shape, test_label.shape

((148224, 30, 4), (148224, 10, 4), (37056, 30, 4), (37056, 10, 4))

In [9]:
import tensorflow as tf

In [10]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, LSTM, Bidirectional,Reshape

In [11]:
# Define the model
model = Sequential()

# Add the first LSTM layer with 30 units, input_shape specifies the shape of each input sample
model.add(LSTM(units=30, return_sequences=True, input_shape=(30, 4)))

# Add a Dropout layer to prevent overfitting
model.add(Dropout(0.1))

# Add a second LSTM layer with 30 units
model.add(LSTM(units=30, return_sequences=False))

# Add another Dropout layer
model.add(Dropout(0.1))

# Output layer that predicts 10 future time steps, each with 4 features
model.add(Dense(10 * 4))  # Predicts 10 time steps, each with 4 features
model.add(Reshape((10, 4)))  # Reshapes the output to (10, 4)

# Compile the model
model.compile(optimizer='adam', loss='mean_squared_error', metrics=['mean_absolute_error'])

# Model summary to visualize the structure
model.summary()


Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
lstm (LSTM)                  (None, 30, 30)            4200      
_________________________________________________________________
dropout (Dropout)            (None, 30, 30)            0         
_________________________________________________________________
lstm_1 (LSTM)                (None, 30)                7320      
_________________________________________________________________
dropout_1 (Dropout)          (None, 30)                0         
_________________________________________________________________
dense (Dense)                (None, 40)                1240      
_________________________________________________________________
reshape (Reshape)            (None, 10, 4)             0         
Total params: 12,760
Trainable params: 12,760
Non-trainable params: 0
____________________________________________________

In [12]:
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint

# Define early stopping callback
early_stopping = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)

# Define model checkpoint callback to save the best model
model_checkpoint = ModelCheckpoint('best_model.h5', monitor='val_loss', save_best_only=True)

# Fit the model with early stopping and model checkpointing
history = model.fit(train_seq, train_label, 
                    epochs=80, 
                    validation_data=(test_seq, test_label), 
                    verbose=1, 
                    callbacks=[early_stopping, model_checkpoint])


Epoch 1/80
Epoch 2/80
Epoch 3/80
Epoch 4/80
Epoch 5/80
Epoch 6/80
Epoch 7/80
Epoch 8/80
Epoch 9/80
Epoch 10/80
Epoch 11/80
Epoch 12/80
Epoch 13/80
Epoch 14/80
Epoch 15/80
Epoch 16/80
Epoch 17/80
Epoch 18/80
Epoch 19/80
Epoch 20/80
Epoch 21/80
Epoch 22/80
Epoch 23/80
Epoch 24/80
Epoch 25/80
Epoch 26/80
Epoch 27/80
Epoch 28/80
Epoch 29/80


In [13]:
model.save('best_model.h5')