In [ ]:
# Set conda environment for proper GPU support
import os
os.environ['CONDA_DEFAULT_ENV'] = 'trading-env'

# Configure GPU
import tensorflow as tf

def configure_gpu():
    """Configure TensorFlow for optimal GPU usage."""
    print("🔧 Configuring GPU settings...")
    
    gpus = tf.config.list_physical_devices('GPU')
    
    if gpus:
        try:
            print(f"🎮 Found {len(gpus)} GPU(s):")
            for i, gpu in enumerate(gpus):
                print(f"  GPU {i}: {gpu}")
            
            for gpu in gpus:
                tf.config.experimental.set_memory_growth(gpu, True)
                print(f"  ✅ Memory growth enabled for {gpu}")
            
            policy = tf.keras.mixed_precision.Policy('mixed_float16')
            tf.keras.mixed_precision.set_global_policy(policy)
            print("  ✅ Mixed precision enabled (float16)")
            
            print(f"  ✅ GPU acceleration: {tf.test.is_gpu_available()}")
            print(f"  ✅ GPU device name: {tf.test.gpu_device_name()}")
            
            return True
            
        except RuntimeError as e:
            print(f"  ❌ GPU setup failed: {e}")
            return False
    else:
        print("  ⚠️ No GPUs found, using CPU")
        return False

def verify_gpu_usage():
    """Verify that TensorFlow is actually using GPU."""
    print("\n🔍 GPU Usage Verification:")
    
    with tf.device('/GPU:0' if tf.config.list_physical_devices('GPU') else '/CPU:0'):
        a = tf.random.normal([1000, 1000])
        b = tf.random.normal([1000, 1000])
        c = tf.matmul(a, b)
        
        print(f"  Test computation device: {c.device}")
        print(f"  GPU available: {tf.config.list_physical_devices('GPU')}")
        
    if tf.config.list_physical_devices('GPU'):
        gpu_details = tf.config.experimental.get_device_details(tf.config.list_physical_devices('GPU')[0])
        print(f"  GPU details: {gpu_details}")

gpu_available = configure_gpu()
verify_gpu_usage()

if gpu_available:
    print("\n⚡ GPU Optimization Settings Applied:")
    print("  - Memory growth enabled")
    print("  - Mixed precision training (float16)")
    print("  - GPU device verification completed")
    
    tf.config.optimizer.set_jit(True)
    print("  - XLA compilation enabled")
else:
    print("\n🖥️ CPU Optimization Settings:")
    tf.config.threading.set_intra_op_parallelism_threads(0)
    tf.config.threading.set_inter_op_parallelism_threads(0)
    print("  - Multi-threading enabled for CPU")

# 📈 Clean CNN-LSTM Forex Trading Strategy

This notebook implements a clean, production-ready CNN+LSTM hybrid model for forex price direction prediction using technical indicators and relative currency strength (RCS).

## Overview
- **Architecture**: CNN layers for feature extraction + LSTM for temporal patterns
- **Features**: Technical indicators (RSI, MACD, ATR, etc.) + Relative Currency Strength
- **Target**: Binary classification (price direction prediction)
- **Data Source**: MetaTrader 5 or Yahoo Finance
- **Export**: Trained models in H5 and ONNX formats

## Requirements
- Python 3.8+
- TensorFlow 2.x
- scikit-learn
- pandas, numpy
- ta (technical analysis library)
- matplotlib, seaborn

## 1. Configuration and Setup

In [1]:
# Configuration parameters
SYMBOLS = ['EURUSD', 'GBPUSD']  # Main trading pairs to predict
ALL_SYMBOLS = ["EURUSD", "GBPUSD", "USDJPY", "AUDUSD", "USDCAD", "EURJPY", "GBPJPY"]  # For RCS calculation
LOOKBACK_WINDOW = 20  # Number of time steps for sequence input
TEST_SIZE = 0.2  # Proportion of data for testing
VALIDATION_SIZE = 0.15  # Proportion of training data for validation

# Model parameters
EPOCHS = 100
BATCH_SIZE = 32
EARLY_STOPPING_PATIENCE = 10
DROPOUT_RATE = 0.3

# Data source configuration
PROVIDER = "metatrader"  # or "yahoo"
BROKER = "amp_global"
INTERVAL = "H1"

print("✅ Configuration set")
print(f"Target symbols: {SYMBOLS}")
print(f"Lookback window: {LOOKBACK_WINDOW} periods")
print(f"Data provider: {PROVIDER}")

✅ Configuration set
Target symbols: ['EURUSD', 'GBPUSD']
Lookback window: 20 periods
Data provider: metatrader


## 2. Import Libraries and Enable GPU

In [2]:
# Core libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import warnings
warnings.filterwarnings('ignore')

# Machine learning libraries
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

# Deep learning libraries
import tensorflow as tf
from tensorflow.keras.models import Sequential, Model
from tensorflow.keras.layers import (
    Input, Conv1D, LSTM, Dense, Dropout, 
    BatchNormalization, concatenate
)
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau
from tensorflow.keras.regularizers import l1_l2

# Technical analysis
import ta
from ta.volatility import BollingerBands, AverageTrueRange
from ta.trend import ADXIndicator, MACD, CCIIndicator
from ta.momentum import StochasticOscillator, ROCIndicator, RSIIndicator

# Local modules (ensure these exist in your src/ directory)
from src.utils.gpu_config import configure_tensorflow_gpu
from src.data.loader import load_or_fetch, load_metatrader_data

# Configure GPU if available
gpu_configured = configure_tensorflow_gpu()
if gpu_configured:
    print("🎮 GPU acceleration enabled!")
else:
    print("🖥️ Using CPU fallback")

print("✅ Libraries imported successfully")

✅ GPU configuration utilities loaded
⚠️ No GPU devices found
🖥️ Using CPU fallback
✅ Libraries imported successfully


## 3. Data Loading and Preprocessing

Load OHLC data for all currency pairs to calculate relative currency strength and technical indicators.

In [3]:
def load_forex_data(symbols, provider="metatrader", broker="amp_global", interval="H1"):
    """
    Load forex data for multiple symbols and create a MultiIndex DataFrame.
    
    Args:
        symbols: List of symbol names (e.g., ['EURUSD', 'GBPUSD'])
        provider: Data provider ('metatrader' or 'yahoo')
        broker: Broker name for MetaTrader data
        interval: Time interval (e.g., 'H1', 'M15')
    
    Returns:
        pandas.DataFrame: MultiIndex DataFrame with (symbol, field) columns
    """
    data = {}
    
    for symbol in symbols:
        print(f"📥 Loading data for {symbol}...")
        
        try:
            if provider == "metatrader":
                df = load_or_fetch(
                    symbol=symbol,
                    provider=provider,
                    loader_func=load_metatrader_data,
                    api_key="",
                    interval=interval,
                    broker=broker,
                    force_refresh=False
                )
            else:
                # Yahoo Finance fallback
                import yfinance as yf
                ticker = f"{symbol}=X" if len(symbol) == 6 else symbol
                df = yf.download(ticker, period="2y", interval="1h")
                df = df.reset_index()
                df.columns = ['time', 'open', 'high', 'low', 'close', 'volume']
                df['tick_volume'] = df['volume']
            
            # Standardize column names and data structure
            required_cols = ['time', 'open', 'high', 'low', 'close', 'tick_volume']
            if all(col in df.columns for col in required_cols):
                df = df[required_cols].dropna()
                df['time'] = pd.to_datetime(df['time'])
                df = df.set_index('time')
                
                # Add to MultiIndex structure
                for col in ['open', 'high', 'low', 'close', 'tick_volume']:
                    data[(symbol, col)] = df[col]
                    
                print(f"✅ Loaded {len(df)} records for {symbol}")
            else:
                print(f"⚠️ Missing required columns for {symbol}")
                
        except Exception as e:
            print(f"❌ Failed to load {symbol}: {str(e)}")
            continue
    
    if not data:
        raise ValueError("No data loaded successfully")
    
    prices_df = pd.DataFrame(data)
    print(f"\n📊 Final dataset shape: {prices_df.shape}")
    print(f"Date range: {prices_df.index.min()} to {prices_df.index.max()}")
    
    return prices_df

# Load data for all symbols
print("🔄 Loading forex data...")
prices = load_forex_data(
    symbols=ALL_SYMBOLS,
    provider=PROVIDER,
    broker=BROKER,
    interval=INTERVAL
)

print("\n📈 Sample data:")
print(prices.head())

🔄 Loading forex data...
📥 Loading data for EURUSD...
🔄 Fetching fresh data from metatrader API
📦 Saved to data/metatrader_EURUSD.parquet and data/metatrader_EURUSD.h5
⚠️ Missing required columns for EURUSD
📥 Loading data for GBPUSD...
🔄 Fetching fresh data from metatrader API
📦 Saved to data/metatrader_GBPUSD.parquet and data/metatrader_GBPUSD.h5
⚠️ Missing required columns for GBPUSD
📥 Loading data for USDJPY...
🔄 Fetching fresh data from metatrader API
📦 Saved to data/metatrader_USDJPY.parquet and data/metatrader_USDJPY.h5
⚠️ Missing required columns for USDJPY
📥 Loading data for AUDUSD...
🔄 Fetching fresh data from metatrader API
📦 Saved to data/metatrader_AUDUSD.parquet and data/metatrader_AUDUSD.h5
⚠️ Missing required columns for AUDUSD
📥 Loading data for USDCAD...
🔄 Fetching fresh data from metatrader API
📦 Saved to data/metatrader_USDCAD.parquet and data/metatrader_USDCAD.h5
⚠️ Missing required columns for USDCAD
📥 Loading data for EURJPY...
🔄 Fetching fresh data from metatrader

ValueError: No data loaded successfully

## 4. Feature Engineering

Calculate technical indicators and relative currency strength for enhanced predictive power.

In [4]:
def calculate_relative_currency_strength(prices_df):
    """
    Calculate Relative Currency Strength (RCS) from forex pair log returns.
    
    Args:
        prices_df: MultiIndex DataFrame with (symbol, 'close') columns
    
    Returns:
        pandas.DataFrame: RCS values for each currency
    """
    print("🧮 Calculating Relative Currency Strength...")
    
    # Extract close prices and calculate log returns
    close_prices = {}
    for symbol in ALL_SYMBOLS:
        if (symbol, 'close') in prices_df.columns:
            close_prices[symbol] = prices_df[(symbol, 'close')]
    
    close_df = pd.DataFrame(close_prices)
    log_returns = np.log(close_df / close_df.shift(1)).dropna()
    
    # Extract unique currencies
    currencies = list(set([s[:3] for s in log_returns.columns] + [s[3:6] for s in log_returns.columns]))
    
    # Calculate RCS
    rcs_data = {c: [] for c in currencies}
    
    for i in range(len(log_returns)):
        row = log_returns.iloc[i]
        daily_strength = {c: 0 for c in currencies}
        counts = {c: 0 for c in currencies}
        
        for pair, ret in row.items():
            if pd.notna(ret):
                base, quote = pair[:3], pair[3:]
                daily_strength[base] += ret
                daily_strength[quote] -= ret
                counts[base] += 1
                counts[quote] += 1
        
        for c in currencies:
            avg_strength = daily_strength[c] / counts[c] if counts[c] > 0 else 0
            rcs_data[c].append(avg_strength)
    
    rcs_df = pd.DataFrame(rcs_data, index=log_returns.index)
    print(f"✅ RCS calculated for {len(currencies)} currencies")
    
    return rcs_df

def calculate_technical_indicators(prices_df, symbol):
    """
    Calculate comprehensive technical indicators for a given symbol.
    
    Args:
        prices_df: MultiIndex DataFrame with OHLC data
        symbol: Symbol name (e.g., 'EURUSD')
    
    Returns:
        pandas.DataFrame: Technical indicators
    """
    print(f"📊 Calculating technical indicators for {symbol}...")
    
    # Extract OHLC data
    ohlc_data = {}
    for field in ['open', 'high', 'low', 'close']:
        if (symbol, field) in prices_df.columns:
            ohlc_data[field] = prices_df[(symbol, field)]
        else:
            # Fallback to close price if field missing
            ohlc_data[field] = prices_df[(symbol, 'close')]
    
    close = ohlc_data['close']
    high = ohlc_data['high']
    low = ohlc_data['low']
    
    indicators = pd.DataFrame(index=close.index)
    
    # Momentum indicators
    indicators['rsi'] = RSIIndicator(close=close, window=14).rsi()
    indicators['roc'] = ROCIndicator(close=close, window=10).roc()
    indicators['momentum'] = close.pct_change(periods=10)
    
    # Trend indicators
    macd = MACD(close=close)
    indicators['macd'] = macd.macd()
    indicators['macd_signal'] = macd.macd_signal()
    indicators['macd_histogram'] = macd.macd_diff()
    
    indicators['cci'] = CCIIndicator(high=high, low=low, close=close).cci()
    indicators['adx'] = ADXIndicator(high=high, low=low, close=close).adx()
    
    # Volatility indicators
    indicators['atr'] = AverageTrueRange(high=high, low=low, close=close).average_true_range()
    
    bb = BollingerBands(close=close)
    indicators['bb_upper'] = bb.bollinger_hband()
    indicators['bb_lower'] = bb.bollinger_lband()
    indicators['bb_width'] = (bb.bollinger_hband() - bb.bollinger_lband()) / bb.bollinger_mavg()
    indicators['bb_position'] = (close - bb.bollinger_lband()) / (bb.bollinger_hband() - bb.bollinger_lband())
    
    # Stochastic oscillator
    stoch = StochasticOscillator(high=high, low=low, close=close)
    indicators['stoch_k'] = stoch.stoch()
    indicators['stoch_d'] = stoch.stoch_signal()
    
    # Price-based features
    indicators['return_1h'] = close.pct_change(1)
    indicators['return_4h'] = close.pct_change(4)
    indicators['return_24h'] = close.pct_change(24)
    
    # Rolling statistics
    indicators['sma_5'] = close.rolling(window=5).mean()
    indicators['sma_20'] = close.rolling(window=20).mean()
    indicators['ema_12'] = close.ewm(span=12).mean()
    indicators['ema_26'] = close.ewm(span=26).mean()
    
    indicators['volatility_5'] = close.rolling(window=5).std()
    indicators['volatility_20'] = close.rolling(window=20).std()
    
    # Price position indicators
    indicators['price_position_5'] = (close - close.rolling(5).min()) / (close.rolling(5).max() - close.rolling(5).min())
    indicators['price_position_20'] = (close - close.rolling(20).min()) / (close.rolling(20).max() - close.rolling(20).min())
    
    # Time-based features
    indicators['hour'] = indicators.index.hour
    indicators['day_of_week'] = indicators.index.dayofweek
    indicators['month'] = indicators.index.month
    
    # Forward fill and backward fill NaN values
    indicators = indicators.ffill().bfill()
    
    print(f"✅ Calculated {len(indicators.columns)} technical indicators")
    return indicators

# Calculate RCS
rcs = calculate_relative_currency_strength(prices)

# Calculate technical indicators for target symbols
all_features = {}
for symbol in SYMBOLS:
    if (symbol, 'close') in prices.columns:
        indicators = calculate_technical_indicators(prices, symbol)
        all_features[symbol] = indicators
        print(f"📊 {symbol}: {indicators.shape[0]} rows, {indicators.shape[1]} features")

print("\n✅ Feature engineering completed")

NameError: name 'prices' is not defined

## 5. Target Variable Creation and Data Preparation

In [None]:
def create_target_variable(prices_df, symbol, prediction_horizon=1):
    """
    Create binary target variable for price direction prediction.
    
    Args:
        prices_df: MultiIndex DataFrame with price data
        symbol: Symbol name
        prediction_horizon: Number of periods ahead to predict
    
    Returns:
        pandas.Series: Binary target (1 = price up, 0 = price down)
    """
    close_prices = prices_df[(symbol, 'close')]
    future_prices = close_prices.shift(-prediction_horizon)
    target = (future_prices > close_prices).astype(int)
    return target

def prepare_features_and_target(indicators_df, rcs_df, target_series, lookback_window):
    """
    Prepare feature matrix and target for model training.
    
    Args:
        indicators_df: Technical indicators DataFrame
        rcs_df: Relative Currency Strength DataFrame
        target_series: Target variable Series
        lookback_window: Number of time steps for sequences
    
    Returns:
        tuple: (X, y, feature_names)
    """
    print("🔧 Preparing features and target...")
    
    # Combine technical indicators with RCS
    # Align indices
    common_index = indicators_df.index.intersection(rcs_df.index).intersection(target_series.index)
    
    # Select relevant RCS currencies (e.g., base and quote currencies)
    symbol_base = target_series.name[:3] if hasattr(target_series, 'name') else 'EUR'
    symbol_quote = target_series.name[3:6] if hasattr(target_series, 'name') else 'USD'
    
    rcs_features = []
    for currency in [symbol_base, symbol_quote, 'USD', 'EUR', 'GBP', 'JPY']:
        if currency in rcs_df.columns:
            rcs_features.append(f'rcs_{currency}')
    
    # Combine features
    combined_features = indicators_df.loc[common_index].copy()
    
    for i, currency in enumerate(['USD', 'EUR', 'GBP', 'JPY', 'AUD', 'CAD']):
        if currency in rcs_df.columns:
            combined_features[f'rcs_{currency}'] = rcs_df[currency].loc[common_index]
    
    # Remove any remaining NaN values
    combined_features = combined_features.ffill().bfill().dropna()
    
    # Align target with features
    final_index = combined_features.index.intersection(target_series.index)
    X_df = combined_features.loc[final_index]
    y_series = target_series.loc[final_index]
    
    # Scale features
    scaler = StandardScaler()
    X_scaled = scaler.fit_transform(X_df)
    
    # Create sequences for LSTM
    X_sequences = []
    y_sequences = []
    
    for i in range(lookback_window, len(X_scaled)):
        X_sequences.append(X_scaled[i-lookback_window:i])
        y_sequences.append(y_series.iloc[i])
    
    X = np.array(X_sequences)
    y = np.array(y_sequences)
    
    feature_names = X_df.columns.tolist()
    
    print(f"✅ Prepared sequences: X shape {X.shape}, y shape {y.shape}")
    print(f"✅ Features: {len(feature_names)} total")
    
    return X, y, feature_names, scaler

# Prepare data for each target symbol
prepared_data = {}

for symbol in SYMBOLS:
    if symbol in all_features and (symbol, 'close') in prices.columns:
        print(f"\n🎯 Preparing data for {symbol}...")
        
        # Create target
        target = create_target_variable(prices, symbol)
        target.name = symbol
        
        # Prepare features
        X, y, feature_names, scaler = prepare_features_and_target(
            all_features[symbol], rcs, target, LOOKBACK_WINDOW
        )
        
        prepared_data[symbol] = {
            'X': X,
            'y': y,
            'feature_names': feature_names,
            'scaler': scaler,
            'target_distribution': pd.Series(y).value_counts(normalize=True)
        }
        
        print(f"📊 Target distribution for {symbol}:")
        print(prepared_data[symbol]['target_distribution'])

print("\n✅ Data preparation completed for all symbols")

## 6. Model Architecture Definition

In [None]:
def create_cnn_lstm_model(input_shape, num_classes=1):
    """
    Create CNN+LSTM hybrid model for forex price direction prediction.
    
    Args:
        input_shape: Shape of input sequences (timesteps, features)
        num_classes: Number of output classes (1 for binary classification)
    
    Returns:
        tensorflow.keras.Model: Compiled model
    """
    print(f"🏗️ Building CNN-LSTM model for input shape: {input_shape}")
    
    model = Sequential([
        # CNN layers for feature extraction
        Conv1D(
            filters=64, 
            kernel_size=3, 
            activation='relu',
            input_shape=input_shape,
            kernel_regularizer=l1_l2(l1=0.01, l2=0.01)
        ),
        BatchNormalization(),
        Dropout(DROPOUT_RATE),
        
        Conv1D(
            filters=32, 
            kernel_size=3, 
            activation='relu',
            kernel_regularizer=l1_l2(l1=0.01, l2=0.01)
        ),
        BatchNormalization(),
        Dropout(DROPOUT_RATE),
        
        # LSTM layer for temporal patterns
        LSTM(
            units=50, 
            return_sequences=False,
            kernel_regularizer=l1_l2(l1=0.01, l2=0.01),
            recurrent_regularizer=l1_l2(l1=0.01, l2=0.01)
        ),
        BatchNormalization(),
        Dropout(DROPOUT_RATE),
        
        # Output layer
        Dense(
            units=num_classes, 
            activation='sigmoid',
            kernel_regularizer=l1_l2(l1=0.01, l2=0.01)
        )
    ])
    
    # Compile model
    model.compile(
        optimizer='adam',
        loss='binary_crossentropy',
        metrics=['accuracy']
    )
    
    print("✅ Model compiled successfully")
    return model

def create_callbacks():
    """
    Create training callbacks for model optimization.
    
    Returns:
        list: List of Keras callbacks
    """
    callbacks = [
        EarlyStopping(
            monitor='val_loss',
            patience=EARLY_STOPPING_PATIENCE,
            restore_best_weights=True,
            verbose=1
        ),
        ReduceLROnPlateau(
            monitor='val_loss',
            factor=0.5,
            patience=5,
            min_lr=1e-6,
            verbose=1
        )
    ]
    return callbacks

print("✅ Model architecture functions defined")

## 7. Model Training and Evaluation

In [None]:
def train_and_evaluate_model(X, y, symbol_name):
    """
    Train and evaluate CNN-LSTM model for a specific symbol.
    
    Args:
        X: Feature sequences
        y: Target values
        symbol_name: Name of the trading symbol
    
    Returns:
        dict: Training results including model, history, and metrics
    """
    print(f"\n🎯 Training model for {symbol_name}...")
    
    # Split data
    split_idx = int(len(X) * (1 - TEST_SIZE))
    X_train, X_test = X[:split_idx], X[split_idx:]
    y_train, y_test = y[:split_idx], y[split_idx:]
    
    # Further split training data for validation
    val_split_idx = int(len(X_train) * (1 - VALIDATION_SIZE))
    X_train_final = X_train[:val_split_idx]
    X_val = X_train[val_split_idx:]
    y_train_final = y_train[:val_split_idx]
    y_val = y_train[val_split_idx:]
    
    print(f"📊 Data splits:")
    print(f"  Training: {X_train_final.shape[0]} samples")
    print(f"  Validation: {X_val.shape[0]} samples")
    print(f"  Test: {X_test.shape[0]} samples")
    
    # Create and train model
    model = create_cnn_lstm_model(input_shape=(X.shape[1], X.shape[2]))
    
    print("\n📋 Model Summary:")
    model.summary()
    
    # Train model
    print(f"\n🚀 Starting training for {EPOCHS} epochs...")
    history = model.fit(
        X_train_final, y_train_final,
        validation_data=(X_val, y_val),
        epochs=EPOCHS,
        batch_size=BATCH_SIZE,
        callbacks=create_callbacks(),
        verbose=1
    )
    
    # Evaluate model
    print("\n📊 Evaluating model performance...")
    
    # Predictions
    y_pred_proba = model.predict(X_test).flatten()
    y_pred = (y_pred_proba > 0.5).astype(int)
    
    # Metrics
    accuracy = accuracy_score(y_test, y_pred)
    
    print(f"\n✅ {symbol_name} Results:")
    print(f"Test Accuracy: {accuracy:.4f}")
    print(f"\nClassification Report:")
    print(classification_report(y_test, y_pred))
    
    print(f"\nConfusion Matrix:")
    cm = confusion_matrix(y_test, y_pred)
    print(cm)
    
    # Plot training history
    plt.figure(figsize=(12, 4))
    
    plt.subplot(1, 2, 1)
    plt.plot(history.history['loss'], label='Training Loss')
    plt.plot(history.history['val_loss'], label='Validation Loss')
    plt.title(f'{symbol_name} - Model Loss')
    plt.xlabel('Epoch')
    plt.ylabel('Loss')
    plt.legend()
    plt.grid(True)
    
    plt.subplot(1, 2, 2)
    plt.plot(history.history['accuracy'], label='Training Accuracy')
    plt.plot(history.history['val_accuracy'], label='Validation Accuracy')
    plt.title(f'{symbol_name} - Model Accuracy')
    plt.xlabel('Epoch')
    plt.ylabel('Accuracy')
    plt.legend()
    plt.grid(True)
    
    plt.tight_layout()
    plt.show()
    
    return {
        'model': model,
        'history': history,
        'X_test': X_test,
        'y_test': y_test,
        'y_pred': y_pred,
        'y_pred_proba': y_pred_proba,
        'accuracy': accuracy,
        'confusion_matrix': cm
    }

# Train models for each symbol
trained_models = {}

for symbol in SYMBOLS:
    if symbol in prepared_data:
        data = prepared_data[symbol]
        results = train_and_evaluate_model(data['X'], data['y'], symbol)
        trained_models[symbol] = results
        print(f"\n✅ {symbol} training completed")

print("\n🎉 All models trained successfully!")

## 8. Backtesting and Performance Analysis

In [None]:
def run_backtest(model_results, prices_df, symbol, confidence_threshold=0.6):
    """
    Run a simple backtest using model predictions.
    
    Args:
        model_results: Dictionary containing model and predictions
        prices_df: Price data DataFrame
        symbol: Trading symbol
        confidence_threshold: Minimum prediction confidence for trades
    
    Returns:
        dict: Backtest results
    """
    print(f"\n📈 Running backtest for {symbol}...")
    
    # Get predictions and probabilities
    y_pred_proba = model_results['y_pred_proba']
    
    # Create trading signals based on confidence threshold
    signals = np.where(
        y_pred_proba > confidence_threshold, 1,  # Buy signal
        np.where(y_pred_proba < (1 - confidence_threshold), -1, 0)  # Sell or No signal
    )
    
    # Get corresponding price returns
    close_prices = prices_df[(symbol, 'close')]
    returns = close_prices.pct_change().fillna(0)
    
    # Align returns with predictions (take last N returns)
    test_returns = returns.iloc[-len(signals):].values
    
    # Calculate strategy returns
    strategy_returns = signals * test_returns
    cumulative_strategy = np.cumsum(strategy_returns)
    cumulative_benchmark = np.cumsum(test_returns)
    
    # Calculate performance metrics
    total_return = cumulative_strategy[-1]
    benchmark_return = cumulative_benchmark[-1]
    
    # Sharpe ratio (annualized)
    strategy_sharpe = np.sqrt(252 * 24) * np.mean(strategy_returns) / np.std(strategy_returns) if np.std(strategy_returns) > 0 else 0
    benchmark_sharpe = np.sqrt(252 * 24) * np.mean(test_returns) / np.std(test_returns) if np.std(test_returns) > 0 else 0
    
    # Maximum drawdown
    cumulative_max = np.maximum.accumulate(cumulative_strategy)
    drawdowns = cumulative_strategy - cumulative_max
    max_drawdown = np.min(drawdowns)
    
    # Win rate
    winning_trades = np.sum(strategy_returns > 0)
    total_trades = np.sum(signals != 0)
    win_rate = winning_trades / total_trades if total_trades > 0 else 0
    
    print(f"📊 Backtest Results for {symbol}:")
    print(f"  Total Return: {total_return:.4f} ({total_return*100:.2f}%)")
    print(f"  Benchmark Return: {benchmark_return:.4f} ({benchmark_return*100:.2f}%)")
    print(f"  Excess Return: {(total_return - benchmark_return)*100:.2f}%")
    print(f"  Strategy Sharpe: {strategy_sharpe:.4f}")
    print(f"  Benchmark Sharpe: {benchmark_sharpe:.4f}")
    print(f"  Max Drawdown: {max_drawdown:.4f} ({max_drawdown*100:.2f}%)")
    print(f"  Win Rate: {win_rate:.4f} ({win_rate*100:.2f}%)")
    print(f"  Total Trades: {total_trades}")
    
    # Plot results
    plt.figure(figsize=(12, 8))
    
    plt.subplot(2, 1, 1)
    plt.plot(cumulative_strategy, label=f'{symbol} Strategy', linewidth=2)
    plt.plot(cumulative_benchmark, label='Buy & Hold', linewidth=2, alpha=0.7)
    plt.title(f'{symbol} - Cumulative Returns Comparison')
    plt.xlabel('Time Period')
    plt.ylabel('Cumulative Return')
    plt.legend()
    plt.grid(True, alpha=0.3)
    
    plt.subplot(2, 1, 2)
    plt.plot(drawdowns, label='Drawdown', color='red', alpha=0.7)
    plt.fill_between(range(len(drawdowns)), drawdowns, 0, alpha=0.3, color='red')
    plt.title(f'{symbol} - Strategy Drawdown')
    plt.xlabel('Time Period')
    plt.ylabel('Drawdown')
    plt.legend()
    plt.grid(True, alpha=0.3)
    
    plt.tight_layout()
    plt.show()
    
    return {
        'total_return': total_return,
        'benchmark_return': benchmark_return,
        'excess_return': total_return - benchmark_return,
        'strategy_sharpe': strategy_sharpe,
        'benchmark_sharpe': benchmark_sharpe,
        'max_drawdown': max_drawdown,
        'win_rate': win_rate,
        'total_trades': total_trades,
        'signals': signals,
        'strategy_returns': strategy_returns,
        'cumulative_strategy': cumulative_strategy,
        'cumulative_benchmark': cumulative_benchmark
    }

# Run backtests for all trained models
backtest_results = {}

for symbol in trained_models:
    results = run_backtest(
        trained_models[symbol], 
        prices, 
        symbol, 
        confidence_threshold=0.7
    )
    backtest_results[symbol] = results

print("\n✅ All backtests completed")

## 9. Model Export and Saving

In [None]:
import os
from datetime import datetime

def export_model_with_metadata(model, symbol, feature_names, model_results, backtest_results, export_dir="exported_models"):
    """
    Export trained model in multiple formats with comprehensive metadata.
    
    Args:
        model: Trained Keras model
        symbol: Trading symbol
        feature_names: List of feature names
        model_results: Model training and evaluation results
        backtest_results: Backtesting results
        export_dir: Directory to save models
    
    Returns:
        dict: Export paths and metadata
    """
    print(f"\n💾 Exporting model for {symbol}...")
    
    # Create export directory if it doesn't exist
    os.makedirs(export_dir, exist_ok=True)
    
    # Generate timestamp
    timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
    base_name = f"{symbol}_CNN_LSTM_{timestamp}"
    
    # Export paths
    h5_path = os.path.join(export_dir, f"{base_name}.h5")
    onnx_path = os.path.join(export_dir, f"{base_name}.onnx")
    metadata_path = os.path.join(export_dir, f"{base_name}_metadata.json")
    metrics_path = os.path.join(export_dir, f"{base_name}_metrics.csv")
    
    # Save Keras model
    model.save(h5_path)
    print(f"✅ Saved H5 model: {h5_path}")
    
    # Save ONNX model
    try:
        import tf2onnx
        import onnx
        
        spec = (tf.TensorSpec(model.input.shape, tf.float32, name="input"),)
        onnx_model, _ = tf2onnx.convert.from_keras(model, input_signature=spec, opset=13)
        onnx.save(onnx_model, onnx_path)
        print(f"✅ Saved ONNX model: {onnx_path}")
    except Exception as e:
        print(f"⚠️ ONNX export failed: {str(e)}")
        onnx_path = None
    
    # Create metadata
    metadata = {
        "model_info": {
            "symbol": symbol,
            "model_type": "CNN_LSTM",
            "timestamp": timestamp,
            "lookback_window": LOOKBACK_WINDOW,
            "num_features": len(feature_names),
            "feature_names": feature_names
        },
        "training_config": {
            "epochs": EPOCHS,
            "batch_size": BATCH_SIZE,
            "dropout_rate": DROPOUT_RATE,
            "early_stopping_patience": EARLY_STOPPING_PATIENCE
        },
        "model_performance": {
            "test_accuracy": float(model_results['accuracy']),
            "final_train_loss": float(model_results['history'].history['loss'][-1]),
            "final_val_loss": float(model_results['history'].history['val_loss'][-1]),
            "final_train_acc": float(model_results['history'].history['accuracy'][-1]),
            "final_val_acc": float(model_results['history'].history['val_accuracy'][-1])
        },
        "backtest_performance": {
            "total_return": float(backtest_results['total_return']),
            "benchmark_return": float(backtest_results['benchmark_return']),
            "excess_return": float(backtest_results['excess_return']),
            "strategy_sharpe": float(backtest_results['strategy_sharpe']),
            "max_drawdown": float(backtest_results['max_drawdown']),
            "win_rate": float(backtest_results['win_rate']),
            "total_trades": int(backtest_results['total_trades'])
        },
        "file_paths": {
            "h5_model": h5_path,
            "onnx_model": onnx_path,
            "metadata": metadata_path,
            "metrics": metrics_path
        }
    }
    
    # Save metadata
    import json
    with open(metadata_path, 'w') as f:
        json.dump(metadata, f, indent=2)
    print(f"✅ Saved metadata: {metadata_path}")
    
    # Create metrics CSV
    metrics_df = pd.DataFrame({
        'metric': ['test_accuracy', 'total_return', 'benchmark_return', 'excess_return',
                  'strategy_sharpe', 'max_drawdown', 'win_rate', 'total_trades'],
        'value': [
            model_results['accuracy'],
            backtest_results['total_return'],
            backtest_results['benchmark_return'],
            backtest_results['excess_return'],
            backtest_results['strategy_sharpe'],
            backtest_results['max_drawdown'],
            backtest_results['win_rate'],
            backtest_results['total_trades']
        ]
    })
    metrics_df.to_csv(metrics_path, index=False)
    print(f"✅ Saved metrics: {metrics_path}")
    
    return metadata

# Export all trained models
exported_models = {}

for symbol in trained_models:
    if symbol in backtest_results:
        metadata = export_model_with_metadata(
            model=trained_models[symbol]['model'],
            symbol=symbol,
            feature_names=prepared_data[symbol]['feature_names'],
            model_results=trained_models[symbol],
            backtest_results=backtest_results[symbol]
        )
        exported_models[symbol] = metadata

print("\n🎉 All models exported successfully!")

# Summary of all results
print("\n📊 Final Summary:")
print("=" * 50)

for symbol in SYMBOLS:
    if symbol in trained_models and symbol in backtest_results:
        acc = trained_models[symbol]['accuracy']
        ret = backtest_results[symbol]['total_return']
        sharpe = backtest_results[symbol]['strategy_sharpe']
        drawdown = backtest_results[symbol]['max_drawdown']
        
        print(f"\n{symbol}:")
        print(f"  Test Accuracy: {acc:.4f}")
        print(f"  Total Return: {ret:.4f} ({ret*100:.2f}%)")
        print(f"  Sharpe Ratio: {sharpe:.4f}")
        print(f"  Max Drawdown: {drawdown:.4f} ({drawdown*100:.2f}%)")

print("\n✅ Notebook execution completed successfully!")

## 10. Feature Importance Analysis

In [None]:
def analyze_feature_importance(model, X_test, feature_names, symbol, n_repeats=5):
    """
    Analyze feature importance using permutation importance.
    
    Args:
        model: Trained model
        X_test: Test features
        feature_names: List of feature names
        symbol: Trading symbol
        n_repeats: Number of permutation repeats
    
    Returns:
        pandas.DataFrame: Feature importance scores
    """
    print(f"\n🔍 Analyzing feature importance for {symbol}...")
    
    # Calculate baseline score
    baseline_score = model.evaluate(X_test, trained_models[symbol]['y_test'], verbose=0)[1]
    
    # Calculate permutation importance
    importance_scores = []
    
    for feature_idx in range(len(feature_names)):
        scores = []
        
        for _ in range(n_repeats):
            # Create a copy and permute the feature
            X_permuted = X_test.copy()
            np.random.shuffle(X_permuted[:, :, feature_idx])
            
            # Calculate score with permuted feature
            permuted_score = model.evaluate(X_permuted, trained_models[symbol]['y_test'], verbose=0)[1]
            
            # Importance is the decrease in performance
            importance = baseline_score - permuted_score
            scores.append(importance)
        
        importance_scores.append({
            'feature': feature_names[feature_idx],
            'importance_mean': np.mean(scores),
            'importance_std': np.std(scores)
        })
    
    # Create DataFrame and sort by importance
    importance_df = pd.DataFrame(importance_scores)
    importance_df = importance_df.sort_values('importance_mean', ascending=False)
    
    # Plot feature importance
    plt.figure(figsize=(12, 8))
    
    top_features = importance_df.head(20)
    
    plt.barh(range(len(top_features)), top_features['importance_mean'])
    plt.yticks(range(len(top_features)), top_features['feature'])
    plt.xlabel('Importance Score (Accuracy Decrease)')
    plt.title(f'{symbol} - Top 20 Feature Importance')
    plt.gca().invert_yaxis()
    plt.grid(True, alpha=0.3)
    plt.tight_layout()
    plt.show()
    
    print(f"\n📊 Top 10 Important Features for {symbol}:")
    for i, row in importance_df.head(10).iterrows():
        print(f"  {row['feature']}: {row['importance_mean']:.6f} (±{row['importance_std']:.6f})")
    
    return importance_df

# Analyze feature importance for all models
feature_importance_results = {}

for symbol in trained_models:
    if symbol in prepared_data:
        importance_df = analyze_feature_importance(
            model=trained_models[symbol]['model'],
            X_test=trained_models[symbol]['X_test'],
            feature_names=prepared_data[symbol]['feature_names'],
            symbol=symbol
        )
        feature_importance_results[symbol] = importance_df
        
        # Save feature importance to CSV
        importance_df.to_csv(f'feature_importance_{symbol}.csv', index=False)
        print(f"✅ Saved feature importance to feature_importance_{symbol}.csv")

print("\n✅ Feature importance analysis completed")

## Conclusion

This notebook provides a complete implementation of a CNN-LSTM hybrid model for forex trading strategy development. Key features include:

### ✅ Implemented Features:
- **Multi-source data loading** (MetaTrader 5, Yahoo Finance)
- **Comprehensive feature engineering** (30+ technical indicators + RCS)
- **Robust CNN-LSTM architecture** with regularization
- **Proper data splits** and validation procedures
- **Backtesting framework** with performance metrics
- **Model export** in H5 and ONNX formats
- **Feature importance analysis** using permutation importance
- **Comprehensive logging** and visualization

### 📊 Performance Metrics:
- Model accuracy on test set
- Backtesting total returns vs benchmark
- Sharpe ratio and maximum drawdown
- Win rate and trade statistics
- Feature importance rankings

### 🚀 Next Steps:
1. **Hyperparameter optimization** using Optuna or similar
2. **Advanced feature engineering** (wavelets, PCA, etc.)
3. **Ensemble methods** combining multiple models
4. **Real-time deployment** using exported ONNX models
5. **Risk management** integration (position sizing, stop-loss)

### ⚠️ Important Notes:
- This is for educational purposes - always validate thoroughly before live trading
- Consider transaction costs, slippage, and market impact
- Past performance does not guarantee future results
- Always use proper risk management in live trading

---

**Model Files Exported To:** `exported_models/`  
**Feature Importance:** `feature_importance_*.csv`  
**Metadata:** `*_metadata.json`  
**Metrics:** `*_metrics.csv`