In [4]:
# STEP 1: Import Libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, LSTM, Dense, Dropout, Layer
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping
import tensorflow.keras.backend as K
from datetime import datetime, timedelta

In [5]:
# STEP 2: Load and Preprocess Data
data = pd.read_csv('NewDataSet.csv')

In [6]:
# Combine date and time into one datetime column
data['datetime'] = pd.to_datetime(data['date'] + ' ' + data['time'])
data = data[data['datetime'].dt.weekday < 5]  # Filter weekdays only
data.sort_values('datetime', inplace=True)
data.reset_index(drop=True, inplace=True)

In [7]:
# Drop date and time columns after combining
data.drop(columns=['date', 'time'], inplace=True)

In [8]:
# Select features
features = ['open', 'high', 'low', 'close', 'tick_volume']
data_values = data[features].values

In [9]:
# Normalize data
scaler = MinMaxScaler()
data_scaled = scaler.fit_transform(data_values)

In [10]:
# STEP 3: Create Sequences (e.g., past 60 steps to predict next step)
SEQUENCE_LENGTH = 60
X, y, timestamps = [], [], []
for i in range(len(data_scaled) - SEQUENCE_LENGTH):
    X.append(data_scaled[i:i+SEQUENCE_LENGTH])
    y.append(data_scaled[i+SEQUENCE_LENGTH])
    timestamps.append(data['datetime'].iloc[i + SEQUENCE_LENGTH])

X = np.array(X)
y = np.array(y)
timestamps = np.array(timestamps)

In [11]:
# STEP 4: Train/Test Split (time-based)
split = int(0.8 * len(X))
X_train, X_test = X[:split], X[split:]
y_train, y_test = y[:split], y[split:]
timestamps_test = timestamps[split:]

In [12]:
# STEP 5: Define Attention Layer
class Attention(Layer):
    def __init__(self, **kwargs):
        super(Attention, self).__init__(**kwargs)

    def call(self, inputs):
        scores = K.softmax(K.sum(inputs, axis=-1, keepdims=True), axis=1)
        context = inputs * scores
        return K.sum(context, axis=1)

In [13]:
# STEP 6: Build LSTM + Attention Model
input_shape = (X_train.shape[1], X_train.shape[2])
inputs = Input(shape=input_shape)

x = LSTM(128, return_sequences=True)(inputs)
x = Dropout(0.2)(x)
x = LSTM(64, return_sequences=True)(x)
x = Dropout(0.2)(x)

x = Attention()(x)
x = Dense(64, activation='relu')(x)
outputs = Dense(5)(x)  # Predict open, high, low, close, volume

model = Model(inputs, outputs)
model.compile(optimizer=Adam(learning_rate=0.001), loss='mse')
model.summary()




In [14]:
# STEP 7: Train Model
callbacks = [EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)]
history = model.fit(
    X_train, y_train,
    validation_split=0.1,
    epochs=50,
    batch_size=64,
    callbacks=callbacks,
    verbose=1
)

Epoch 1/50
[1m978/978[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m91s[0m 89ms/step - loss: 0.0020 - val_loss: 3.0090e-04
Epoch 2/50
[1m978/978[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m78s[0m 79ms/step - loss: 7.5117e-04 - val_loss: 2.1754e-04
Epoch 3/50
[1m978/978[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m76s[0m 77ms/step - loss: 6.0971e-04 - val_loss: 1.3341e-04
Epoch 4/50
[1m978/978[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m78s[0m 80ms/step - loss: 5.0855e-04 - val_loss: 1.5540e-04
Epoch 5/50
[1m978/978[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m83s[0m 84ms/step - loss: 4.3936e-04 - val_loss: 1.4489e-04
Epoch 6/50
[1m978/978[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m91s[0m 93ms/step - loss: 3.6259e-04 - val_loss: 6.7695e-05
Epoch 7/50
[1m978/978[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m93s[0m 95ms/step - loss: 3.3182e-04 - val_loss: 6.4593e-05
Epoch 8/50
[1m978/978[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m106s[0m 108ms/step - lo

In [15]:
# STEP 8: Evaluate Model
loss = model.evaluate(X_test, y_test)
print("Test MSE Loss:", loss)

[1m544/544[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 18ms/step - loss: 1.9045e-04
Test MSE Loss: 0.00042429313180036843


In [16]:
# STEP 9: Make Future Prediction Function Based on Date and Time
def predict_for_datetime(model, data_scaled, original_data, scaler, sequence_length, target_datetime):
    target_datetime = pd.to_datetime(target_datetime)
    if target_datetime not in original_data['datetime'].values:
        raise ValueError("Date and time not found in historical data.")

    idx = original_data.index[original_data['datetime'] == target_datetime][0]
    if idx < sequence_length:
        raise ValueError("Not enough history before the given date and time.")

    input_seq = data_scaled[idx - sequence_length:idx].reshape(1, sequence_length, data_scaled.shape[1])
    pred_scaled = model.predict(input_seq)
    pred_actual = scaler.inverse_transform(pred_scaled)
    return pred_actual[0]  # Return open, high, low, close, tick_volume

In [17]:
# STEP 10: Predict and Display
user_datetime = input("Enter the datetime for prediction (YYYY-MM-DD HH:MM:SS): ")
prediction = predict_for_datetime(model, data_scaled, data, scaler, SEQUENCE_LENGTH, user_datetime)
print("Predicted Values for {} (open, high, low, close, tick_volume):".format(user_datetime))
print(prediction)

ValueError: Date and time not found in historical data.

In [None]:
# OPTIONAL: Plot training loss
plt.plot(history.history['loss'], label='Train Loss')
plt.plot(history.history['val_loss'], label='Validation Loss')
plt.title('Training History')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.legend()
plt.show()