In [1]:
# STEP 1: Import Libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, LSTM, Dense, Dropout, Layer
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping
import tensorflow.keras.backend as K
from datetime import datetime, timedelta

In [2]:
# STEP 2: Load and Preprocess Data
data = pd.read_csv('NewDataSet.csv')

In [3]:
# Combine date and time into one datetime column
data['datetime'] = pd.to_datetime(data['date'] + ' ' + data['time'])
data = data[data['datetime'].dt.weekday < 5]  # Filter weekdays only
data.sort_values('datetime', inplace=True)
data.reset_index(drop=True, inplace=True)

In [4]:
# Drop date and time columns after combining
data.drop(columns=['date', 'time'], inplace=True)

# Select features
features = ['open', 'high', 'low', 'close', 'tick_volume']
data_values = data[features].values

# Normalize data
scaler = MinMaxScaler()
data_scaled = scaler.fit_transform(data_values)

In [5]:
# STEP 3: Create Sequences (e.g., past 60 steps to predict next step)
SEQUENCE_LENGTH = 60
X, y = [], []
for i in range(len(data_scaled) - SEQUENCE_LENGTH):
    X.append(data_scaled[i:i+SEQUENCE_LENGTH])
    y.append(data_scaled[i+SEQUENCE_LENGTH])

X = np.array(X)
y = np.array(y)

# STEP 4: Train/Test Split (time-based)
split = int(0.8 * len(X))
X_train, X_test = X[:split], X[split:]
y_train, y_test = y[:split], y[split:]

In [6]:
# STEP 5: Define Attention Layer
class Attention(Layer):
    def __init__(self, **kwargs):
        super(Attention, self).__init__(**kwargs)

    def call(self, inputs):
        scores = K.softmax(K.sum(inputs, axis=-1, keepdims=True), axis=1)
        context = inputs * scores
        return K.sum(context, axis=1)

In [7]:
# STEP 6: Build LSTM + Attention Model
input_shape = (X_train.shape[1], X_train.shape[2])
inputs = Input(shape=input_shape)

x = LSTM(128, return_sequences=True)(inputs)
x = Dropout(0.2)(x)
x = LSTM(64, return_sequences=True)(x)
x = Dropout(0.2)(x)

x = Attention()(x)
x = Dense(64, activation='relu')(x)
outputs = Dense(5)(x)  # Predict open, high, low, close, volume

model = Model(inputs, outputs)
model.compile(optimizer=Adam(learning_rate=0.001), loss='mse')
model.summary()




In [8]:
# STEP 7: Train Model
callbacks = [EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)]
history = model.fit(
    X_train, y_train,
    validation_split=0.1,
    epochs=50,
    batch_size=64,
    callbacks=callbacks,
    verbose=1
)

Epoch 1/50
[1m978/978[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m88s[0m 86ms/step - loss: 0.0021 - val_loss: 2.8393e-04
Epoch 2/50
[1m978/978[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m93s[0m 95ms/step - loss: 7.8699e-04 - val_loss: 2.1454e-04
Epoch 3/50
[1m978/978[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m74s[0m 75ms/step - loss: 6.2671e-04 - val_loss: 1.3135e-04
Epoch 4/50
[1m978/978[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m72s[0m 73ms/step - loss: 4.9964e-04 - val_loss: 1.2850e-04
Epoch 5/50
[1m978/978[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m74s[0m 76ms/step - loss: 4.5181e-04 - val_loss: 7.7583e-05
Epoch 6/50
[1m978/978[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m80s[0m 81ms/step - loss: 3.3672e-04 - val_loss: 9.5489e-05
Epoch 7/50
[1m978/978[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m74s[0m 76ms/step - loss: 3.0823e-04 - val_loss: 7.3958e-05
Epoch 8/50
[1m978/978[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m75s[0m 76ms/step - loss

In [9]:
# STEP 8: Evaluate Model
loss = model.evaluate(X_test, y_test)
print("Test MSE Loss:", loss)

[1m544/544[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m16s[0m 29ms/step - loss: 1.5233e-04
Test MSE Loss: 0.00028687555459327996


In [13]:
# STEP 9: Recursive Future Forecast Function
def recursive_forecast(model, data_scaled, scaler, sequence_length, future_steps):
    input_seq = data_scaled[-sequence_length:].reshape(1, sequence_length, data_scaled.shape[1])
    predictions_scaled = []

    for _ in range(future_steps):
        pred = model.predict(input_seq)
        predictions_scaled.append(pred[0])
        # Update input sequence
        input_seq = np.append(input_seq[:, 1:, :], [[pred[0]]], axis=1)

    predictions_scaled = np.array(predictions_scaled)
    predictions_actual = scaler.inverse_transform(predictions_scaled)
    return predictions_actual

In [14]:
# STEP 10: Predict Future and Plot
future_steps = int(input("Enter number of future 30-min steps to predict: "))
predictions = recursive_forecast(model, data_scaled, scaler, SEQUENCE_LENGTH, future_steps)

# Generate future timestamps
last_datetime = data['datetime'].iloc[-1]
future_datetimes = [last_datetime + timedelta(minutes=30 * i) for i in range(1, future_steps + 1)]


ValueError: invalid literal for int() with base 10: ''

In [None]:
# Plot predictions
pred_df = pd.DataFrame(predictions, columns=features)
pred_df['datetime'] = future_datetimes

# Plot each feature
for feature in features:
    plt.figure(figsize=(10, 4))
    plt.plot(data['datetime'].iloc[-100:], data[feature].iloc[-100:], label='Actual')
    plt.plot(pred_df['datetime'], pred_df[feature], label='Forecast')
    plt.title(f"{feature.capitalize()} Prediction")
    plt.xlabel("Time")
    plt.ylabel(feature)
    plt.legend()
    plt.tight_layout()
    plt.show()

In [None]:
# OPTIONAL: Plot training loss
plt.figure()
plt.plot(history.history['loss'], label='Train Loss')
plt.plot(history.history['val_loss'], label='Validation Loss')
plt.title('Training History')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.legend()
plt.show()