In [1]:
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow.keras.models import load_model
from sklearn.preprocessing import MinMaxScaler
import joblib
import matplotlib.pyplot as plt
from tensorflow.keras.utils import plot_model


In [2]:
!pip install pydot
!pip install graphviz



In [4]:
def create_sequences_multivariate(data, seq_len, pred_steps=1):
    """
    Create sequences for multivariate time series prediction
    data: numpy array shaped (n_samples, n_features)
    seq_len: sequence length for LSTM
    pred_steps: number of future steps to predict
    """
    X, y = [], []
    n_rows = data.shape[0]

    for i in range(n_rows - seq_len - pred_steps + 1):
        X.append(data[i:i+seq_len])  # Input sequence
        y.append(data[i+seq_len:i+seq_len+pred_steps, -1])  # Predict only close price for future steps

    return np.array(X), np.array(y)

def build_improved_model(seq_len, n_features, pred_steps=4):
    """
    Improved LSTM model architecture
    """
    inputs = tf.keras.Input(shape=(seq_len, n_features))

    # Bidirectional LSTM layers
    x = tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(128, return_sequences=True))(inputs)
    x = tf.keras.layers.Dropout(0.2)(x)
    x = tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(64, return_sequences=True))(x)
    x = tf.keras.layers.Dropout(0.2)(x)
    x = tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(32, return_sequences=False))(x)
    x = tf.keras.layers.Dropout(0.2)(x)

    # Dense layers
    x = tf.keras.layers.Dense(64, activation='relu')(x)
    x = tf.keras.layers.Dense(32, activation='relu')(x)
    x = tf.keras.layers.Dropout(0.2)(x)

    # Output layer - predict multiple future steps
    outputs = tf.keras.layers.Dense(pred_steps)(x)

    model = tf.keras.Model(inputs=inputs, outputs=outputs)

    model.compile(
        optimizer=tf.keras.optimizers.Adam(learning_rate=0.001),
        loss='mse',
        metrics=['mae', 'mape', 'mse']
    )
    model.summary()

    try:
      # Create the visualization
      img_path = f"model_architecture_{symbol}.png"
      plot_model(
          model,
          to_file=img_path,
          show_shapes=True,
          show_layer_names=True,
          rankdir='TB',  # TB: top to bottom, LR: left to right
          dpi=96,
          layer_range=None,
          expand_nested=False,
          show_layer_activations=True,
          show_trainable=True
      )
      print(f"✓ Model architecture saved as: {img_path}")

    except ImportError:
        print("❌ graphviz/pydot not available. Using alternative visualization...")

    return model

def predict_future(model, last_sequence, scaler, n_future, feature_means):
    """
    Generate future predictions using the trained model

    Parameters:
    - model: trained LSTM model
    - last_sequence: last available sequence of shape (seq_len, n_features)
    - scaler: fitted MinMaxScaler
    - n_future: number of future steps to predict
    - feature_means: means of features for synthetic future data
    """
    future_predictions = []
    current_sequence = last_sequence.copy()

    for _ in range(n_future):
        # Reshape for model prediction
        current_seq_reshaped = current_sequence.reshape(1, current_sequence.shape[0], current_sequence.shape[1])

        # Predict next close price
        next_close_scaled = model.predict(current_seq_reshaped, verbose=0)[0]

        # Create synthetic next row with predicted close price
        # For other features, we can use rolling averages or synthetic values
        next_row = np.array([
            current_sequence[-1, 0],  # open - use last open
            current_sequence[-1, 1],  # high - use last high
            current_sequence[-1, 2],  # low - use last low
            next_close_scaled         # close - predicted value
        ])

        # Update sequence: remove first, add new prediction
        current_sequence = np.vstack([current_sequence[1:], next_row])

        # Inverse transform the prediction
        # Create a dummy row with feature means and replace close with prediction
        dummy_row = feature_means.copy()
        dummy_row[-1] = next_close_scaled
        next_close_actual = scaler.inverse_transform(dummy_row.reshape(1, -1))[0, -1]

        future_predictions.append(next_close_actual)

    return np.array(future_predictions)

# Load your trained model and data
def generate_future_predictions(symbol, n_future=20):
    """
    Generate future predictions for a specific symbol
    """
    try:
        # Load the trained model
        model = load_model(f"Generated{symbol}_improved_lstm_best.keras")

        # Load the training data
        train_df = pd.read_csv(f"Generated{symbol} test.csv")

        # Use all features for training
        features = ['open', 'high', 'low', 'close']
        data = train_df[features].values

        # Scale the data
        scaler = MinMaxScaler()
        data_scaled = scaler.fit_transform(data)

        # Get feature means for synthetic data generation
        feature_means = np.mean(data_scaled, axis=0)

        # Create sequences (using your original sequence length)
        seq_len = 240
        X, y = create_sequences_multivariate(data_scaled, seq_len, pred_steps=1)

        # Get the last available sequence
        last_sequence = data_scaled[-seq_len:]

        print(f"Generating {n_future} future predictions for {symbol}...")

        # Generate future predictions
        future_prices = predict_future(model, last_sequence, scaler, n_future, feature_means)

        # Create future dates (assuming hourly data)
        last_date = pd.Timestamp.now()
        future_dates = [last_date + pd.Timedelta(hours=i+1) for i in range(n_future)]

        # Plot results
        plt.figure(figsize=(15, 8))

        # Plot historical data (last 100 points for clarity)
        historical_dates = pd.date_range(end=last_date, periods=100, freq='H')
        historical_prices = data[-100:, -1]

        plt.subplot(2, 1, 1)
        plt.plot(historical_dates, historical_prices, label='Historical Close Price', linewidth=2)
        plt.title(f'{symbol} - Historical Close Price (Last 100 hours)')
        plt.legend()
        plt.grid(True)

        plt.subplot(2, 1, 2)
        plt.plot(future_dates, future_prices, 'r-', label='Predicted Future Prices', linewidth=2, marker='o')
        plt.title(f'{symbol} - Predicted Future Close Prices (Next {n_future} hours)')
        plt.xlabel('Date')
        plt.ylabel('Price')
        plt.legend()
        plt.grid(True)
        plt.xticks(rotation=45)

        plt.tight_layout()
        plt.show()

        # Print predictions
        print(f"\nFuture Predictions for {symbol}:")
        for i, (date, price) in enumerate(zip(future_dates, future_prices)):
            print(f"Step {i+1}: {date.strftime('%Y-%m-%d %H:%M')} - ${price:.5f}")

        # Calculate prediction statistics
        current_price = data[-1, -1]
        predicted_change = ((future_prices[-1] - current_price) / current_price) * 100

        print(f"\nPrediction Statistics:")
        print(f"Current Price: ${current_price:.5f}")
        print(f"Final Predicted Price: ${future_prices[-1]:.5f}")
        print(f"Predicted Change: {predicted_change:+.2f}%")
        print(f"Predicted High: ${np.max(future_prices):.5f}")
        print(f"Predicted Low: ${np.min(future_prices):.5f}")

        return future_prices, future_dates

    except Exception as e:
        print(f"Error generating predictions for {symbol}: {str(e)}")
        return None, None

# Enhanced training function with future prediction capability
def train_model_with_future_prediction(symbol, n_future=20):
    """
    Complete training and future prediction pipeline
    """
    try:
        # Load data
        train_df = pd.read_csv(f"Generated{symbol} dbot.csv")
        features = ['open', 'high', 'low', 'close']
        data = train_df[features].values

        # Scale data
        scaler = MinMaxScaler()
        data_scaled = scaler.fit_transform(data)
        joblib.dump(scaler,"scaler.joblib")
        # Create sequences for multi-step prediction
        seq_len = 240
        pred_steps = 5  # Predict 5 steps ahead during training

        X, y = create_sequences_multivariate(data_scaled, seq_len, pred_steps)

        # Split data
        split_idx = int(0.8 * len(X))
        X_train, X_val = X[:split_idx], X[split_idx:]
        y_train, y_val = y[:split_idx], y[split_idx:]

        print(f"Training data shape: {X_train.shape}")
        print(f"Validation data shape: {X_val.shape}")

        # Build and train model
        model = build_improved_model(seq_len, len(features), pred_steps)

        callbacks = [
            tf.keras.callbacks.EarlyStopping(patience=10, restore_best_weights=True,verbose=2),
            tf.keras.callbacks.ReduceLROnPlateau(patience=5, factor=0.5,verbose=2),
            tf.keras.callbacks.ModelCheckpoint(
                f"Generated{symbol}_improved_lstm_best.keras",
                save_best_only=True,
                monitor='val_loss',
                verbose=2
            )
        ]

        history = model.fit(
            X_train, y_train,
            validation_data=(X_val, y_val),
            epochs=100,
            #batch_size=32,
            callbacks=callbacks,
            verbose=1
        )

        # Generate future predictions
        feature_means = np.mean(data_scaled, axis=0)
        last_sequence = data_scaled[-seq_len:]

        future_prices, future_dates = generate_future_predictions(symbol, n_future)

        return model, future_prices, future_dates

    except Exception as e:
        print(f"Error in training pipeline for {symbol}: {str(e)}")
        return None, None, None



In [5]:
# Example usage
symbol = "GBPUSD"  # Change this to your desired symbol

# Option 1: Generate predictions using existing model
#print("=== Generating Future Predictions ===")
#future_prices, future_dates = generate_future_predictions(symbol, n_future=30)

#Option 2: Retrain model and generate predictions (uncomment if needed)
print("=== Retraining Model and Generating Predictions ===")
model, future_prices, future_dates = train_model_with_future_prediction(symbol, n_future=30)

if future_prices is not None:
    # Save predictions to CSV
    predictions_df = pd.DataFrame({
        'datetime': future_dates,
        'predicted_close': future_prices
    })
    predictions_df.to_csv(f"Generated{symbol}_future_predictions.csv", index=False)
    print(f"\nPredictions saved to: Generated{symbol}_future_predictions.csv")

=== Retraining Model and Generating Predictions ===
Training data shape: (79804, 240, 4)
Validation data shape: (19951, 240, 4)


✓ Model architecture saved as: model_architecture_GBPUSD.png
Epoch 1/100
[1m2494/2494[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m139s[0m 53ms/step - loss: 0.0299 - mae: 0.1059 - mape: 17.1840 - mse: 0.0299 - val_loss: 3.5991e-04 - val_mae: 0.0162 - val_mape: 5998.0654 - val_mse: 3.5991e-04 - learning_rate: 0.0010
Epoch 2/100
[1m2494/2494[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m133s[0m 53ms/step - loss: 0.0018 - mae: 0.0314 - mape: 5.6534 - mse: 0.0018 - val_loss: 8.0965e-04 - val_mae: 0.0257 - val_mape: 6343.9634 - val_mse: 8.0965e-04 - learning_rate: 0.0010
Epoch 3/100
[1m2494/2494[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m132s[0m 53ms/step - loss: 0.0014 - mae: 0.0276 - mape: 5.3404 - mse: 0.0014 - val_loss: 0.0031 - val_mae: 0.0517 - val_mape: 10150.8916 - val_mse: 0.0031 - learning_rate: 0.0010
Epoch 4/100
[1m2494/2494[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m132s[0m 53ms/step - loss: 0.0012 - mae: 0.0264 - mape: 5.1306 - mse: 0.0012 - val_loss: 0.0027