# XAUUSD Prediction Bot Analysis (Optimized v2)
This notebook uses the **Enhanced Hybrid LSTM-ARIMA** model with:
- OHLCV multi-feature input
- Improved LSTM architecture (128/64 units, BatchNorm, Early Stopping)
- Better visualization with historical context

> **Make sure to run this with the 'Python (XAUUSD Venv)' kernel!**

In [1]:
import os
import sys
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf

# Add source path
sys.path.append(os.path.abspath(''))

# Configure GPU if available
gpus = tf.config.list_physical_devices('GPU')
if gpus:
    try:
        for gpu in gpus:
            tf.config.experimental.set_memory_growth(gpu, True)
        print(f"[INFO] GPU Enabled: {len(gpus)} device(s)")
    except RuntimeError as e:
        print(e)
else:
    print("[INFO] No GPU found, using CPU.")

from src.data_loader import DataLoader
from src.hybrid_model import HybridModel
from src.utils import calculate_metrics, get_steps_for_days

[INFO] No GPU found, using CPU.


## 1. Configuration
**Change these values to customize your analysis.**

In [2]:
# ============ CONFIGURATION ============ #
SYMBOL = 'XAUUSD'       # Asset Symbol
TIMEFRAME = '1h'        # Timeframe: '1h', '30m', '15m'
PERIOD = '5y'           # History period: '1y', '2y', '5y'
FUTURE_DAYS = 5         # Days to predict into the future

# Training params (Optimized)
EPOCHS = 50             # More epochs for better convergence
LOOK_BACK = 90          # Longer memory window
USE_MULTIVARIATE = True # Use OHLCV (5 features) instead of Close only
# ======================================= #

## 2. Load Data

In [3]:
print(f"Fetching {SYMBOL} data ({TIMEFRAME})...")
loader = DataLoader(
    symbol=SYMBOL, 
    interval=TIMEFRAME, 
    period=PERIOD, 
    data_path=f'data/gold_{TIMEFRAME}.csv'
)

# Fetch data (Try MT5 first, fallback to yfinance)
df = loader.fetch_data(source='mt5')
print(f"Loaded {len(df)} rows")
df.tail()

Fetching XAUUSD data (1h)...
Loading data from data/gold_1h.csv
Loaded 11478 rows


Unnamed: 0_level_0,Open,High,Low,Close,Volume
Datetime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2025-12-05 17:00:00+00:00,4244.0,4251.0,4236.0,4248.399902,10239
2025-12-05 18:00:00+00:00,4248.200195,4250.299805,4236.899902,4239.299805,9449
2025-12-05 19:00:00+00:00,4239.299805,4241.100098,4205.100098,4237.299805,4787
2025-12-05 20:00:00+00:00,4237.600098,4237.600098,4229.0,4231.600098,5054
2025-12-05 21:00:00+00:00,4231.399902,4231.700195,4194.5,4212.899902,3823


## 3. Preprocess

In [4]:
if USE_MULTIVARIATE:
    print("Using MULTIVARIATE mode (OHLCV features)")
    X, y, scaled_data = loader.prepare_data_for_lstm_multivariate(df, look_back=LOOK_BACK)
else:
    print("Using UNIVARIATE mode (Close only)")
    X, y, scaled_data = loader.prepare_data_for_lstm(df, look_back=LOOK_BACK)

# 80/20 Split
train_size = int(len(X) * 0.8)
X_train, X_test = X[:train_size], X[train_size:]
y_train, y_test = y[:train_size], y[train_size:]

print(f"Input shape: {X.shape}")
print(f"Train: {X_train.shape[0]}, Test: {X_test.shape[0]}")

Using MULTIVARIATE mode (OHLCV features)
Input shape: (11388, 90, 5)
Train: 9110, Test: 2278


## 4. Train Hybrid Model

In [5]:
input_shape = (X.shape[1], X.shape[2])
model = HybridModel(input_shape)

print(f"Training with {EPOCHS} epochs...")
model.train(X_train, y_train, epochs=EPOCHS)

Training with 50 epochs...
Training LSTM part (input: 90 steps x 5 features)...
Epoch 1/50


  super().__init__(**kwargs)


[1m285/285[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m16s[0m 48ms/step - loss: 0.0947 - learning_rate: 5.0000e-04
Epoch 2/50
[1m174/285[0m [32m━━━━━━━━━━━━[0m[37m━━━━━━━━[0m [1m5s[0m 52ms/step - loss: 0.0435

KeyboardInterrupt: 

## 5. Evaluate (Test Set)

In [None]:
final_preds, lstm_preds, arima_preds = model.predict(X_test)

metrics = calculate_metrics(y_test.flatten(), final_preds.flatten())
print("\n=== Hybrid Model Metrics ===")
for k, v in metrics.items():
    print(f"  {k}: {v:.6f}")

# Visualize
plt.figure(figsize=(14, 6))
plt.plot(y_test, label='Actual', color='black', linewidth=1)
plt.plot(final_preds, label='Hybrid Prediction', color='red', linestyle='--', alpha=0.8)
plt.title(f'{SYMBOL} {TIMEFRAME} - Test Set Evaluation')
plt.xlabel('Time Steps')
plt.ylabel('Price (Normalized)')
plt.legend()
plt.grid(True, alpha=0.3)
plt.tight_layout()
plt.show()

## 6. Future Prediction (with Historical Context)

In [None]:
if FUTURE_DAYS > 0:
    steps = get_steps_for_days(TIMEFRAME, FUTURE_DAYS)
    print(f"Predicting next {FUTURE_DAYS} days -> {steps} candles ({TIMEFRAME})")
    
    # Get the last sequence from the full scaled data
    last_sequence = scaled_data[-LOOK_BACK:].reshape(1, LOOK_BACK, scaled_data.shape[1] if len(scaled_data.shape) > 1 else 1)
    
    # For multivariate, Close is at index 3
    close_idx = 3 if USE_MULTIVARIATE else 0
    
    future_final, future_lstm, future_arima = model.predict_future(
        last_sequence, steps, close_feature_idx=close_idx
    )
    
    # Inverse Scale to get real prices
    if USE_MULTIVARIATE:
        future_prices = loader.inverse_transform_close(future_final)
    else:
        future_prices = loader.inverse_transform(future_final)
    
    # Get last N actual prices for context
    context_size = 200  # Show last 200 candles before prediction
    last_actual_prices = df['Close'].values[-context_size:]
    
    # Plot with context
    plt.figure(figsize=(14, 6))
    
    # Historical (black)
    x_history = range(context_size)
    plt.plot(x_history, last_actual_prices, label='Historical', color='black', linewidth=1)
    
    # Future prediction (green) - continues from historical
    x_future = range(context_size, context_size + steps)
    plt.plot(x_future, future_prices.flatten(), label=f'Future {FUTURE_DAYS} Days', 
             color='green', linewidth=2)
    
    # Mark transition point
    plt.axvline(x=context_size, color='blue', linestyle=':', alpha=0.5, label='Forecast Start')
    
    plt.title(f'{SYMBOL} - Historical + Future Forecast ({FUTURE_DAYS} days)')
    plt.xlabel('Time Steps')
    plt.ylabel('Price (USD)')
    plt.legend()
    plt.grid(True, alpha=0.3)
    plt.tight_layout()
    plt.show()
    
    # Print summary
    print(f"\n=== Future Prediction Summary ===")
    print(f"Last Historical Price: ${last_actual_prices[-1]:.2f}")
    print(f"First Future Price:    ${future_prices[0, 0]:.2f}")
    print(f"Final Future Price:    ${future_prices[-1, 0]:.2f}")
    print(f"Price Change:          ${future_prices[-1, 0] - last_actual_prices[-1]:.2f}")