In [ ]:
# Set conda environment for proper GPU support
import os
os.environ['CONDA_DEFAULT_ENV'] = 'trading-env'

# Configure GPU
import tensorflow as tf

def configure_gpu():
    """Configure TensorFlow for optimal GPU usage."""
    print("🔧 Configuring GPU settings...")
    
    gpus = tf.config.list_physical_devices('GPU')
    
    if gpus:
        try:
            print(f"🎮 Found {len(gpus)} GPU(s):")
            for i, gpu in enumerate(gpus):
                print(f"  GPU {i}: {gpu}")
            
            for gpu in gpus:
                tf.config.experimental.set_memory_growth(gpu, True)
                print(f"  ✅ Memory growth enabled for {gpu}")
            
            policy = tf.keras.mixed_precision.Policy('mixed_float16')
            tf.keras.mixed_precision.set_global_policy(policy)
            print("  ✅ Mixed precision enabled (float16)")
            
            print(f"  ✅ GPU acceleration: {len(tf.config.list_physical_devices('GPU')) > 0}")
            print(f"  ✅ GPU device name: {tf.config.list_physical_devices('GPU')[0].name if tf.config.list_physical_devices('GPU') else 'No GPU'}")
            
            return True
            
        except RuntimeError as e:
            print(f"  ❌ GPU setup failed: {e}")
            return False
    else:
        print("  ⚠️ No GPUs found, using CPU")
        return False

def verify_gpu_usage():
    """Verify that TensorFlow is actually using GPU."""
    print("\n🔍 GPU Usage Verification:")
    
    with tf.device('/GPU:0' if tf.config.list_physical_devices('GPU') else '/CPU:0'):
        a = tf.random.normal([1000, 1000])
        b = tf.random.normal([1000, 1000])
        c = tf.matmul(a, b)
        
        print(f"  Test computation device: {c.device}")
        print(f"  GPU available: {tf.config.list_physical_devices('GPU')}")
        
    if tf.config.list_physical_devices('GPU'):
        gpu_details = tf.config.experimental.get_device_details(tf.config.list_physical_devices('GPU')[0])
        print(f"  GPU details: {gpu_details}")

gpu_available = configure_gpu()
verify_gpu_usage()

if gpu_available:
    print("\n⚡ GPU Optimization Settings Applied:")
    print("  - Memory growth enabled")
    print("  - Mixed precision training (float16)")
    print("  - GPU device verification completed")
    
    tf.config.optimizer.set_jit(True)
    print("  - XLA compilation enabled")
else:
    print("\n🖥️ CPU Optimization Settings:")
    tf.config.threading.set_intra_op_parallelism_threads(0)
    tf.config.threading.set_inter_op_parallelism_threads(0)
    print("  - Multi-threading enabled for CPU")

# 📈 Clean CNN-LSTM Forex Trading Strategy (FIXED)

This notebook implements a **robust, production-ready** CNN+LSTM hybrid model for forex price direction prediction with comprehensive error handling and data validation.

## Overview
- **Architecture**: CNN layers for feature extraction + LSTM for temporal patterns
- **Features**: Technical indicators (RSI, MACD, ATR, etc.) + Relative Currency Strength
- **Target**: Binary classification (price direction prediction)
- **Data Source**: MetaTrader 5 or Yahoo Finance with fallbacks
- **Export**: Trained models in H5 and ONNX formats
- **Robustness**: Comprehensive error handling and data validation

## Fixed Issues
- ✅ Import dependency error handling
- ✅ Data loading robustness with fallbacks
- ✅ Column name standardization
- ✅ Data validation and quality checks
- ✅ Memory management
- ✅ Index alignment safety
- ✅ Function signature corrections

## 1. Configuration and Setup

In [None]:
# Configuration parameters
SYMBOLS = ['EURUSD', 'GBPUSD']  # Main trading pairs to predict
ALL_SYMBOLS = ["EURUSD", "GBPUSD", "USDJPY", "AUDUSD", "USDCAD", "EURJPY", "GBPJPY"]  # For RCS calculation
LOOKBACK_WINDOW = 20  # Number of time steps for sequence input
TEST_SIZE = 0.2  # Proportion of data for testing
VALIDATION_SIZE = 0.15  # Proportion of training data for validation

# Model parameters
EPOCHS = 100
BATCH_SIZE = 32
EARLY_STOPPING_PATIENCE = 10
DROPOUT_RATE = 0.3

# Data source configuration
PROVIDER = "yahoo"  # Start with yahoo as more reliable fallback
BROKER = "amp_global"
INTERVAL = "H1"

# Data quality thresholds
MIN_DATA_POINTS = 500  # Minimum data points required
MIN_TRAINING_SAMPLES = 100  # Minimum samples for training
MAX_NAN_RATIO = 0.1  # Maximum proportion of NaN values allowed

print("✅ Configuration set")
print(f"Target symbols: {SYMBOLS}")
print(f"Lookback window: {LOOKBACK_WINDOW} periods")
print(f"Data provider: {PROVIDER}")
print(f"Minimum data points required: {MIN_DATA_POINTS}")

## 2. Import Libraries with Error Handling

In [None]:
# Core libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import warnings
import logging
import gc
import os
from datetime import datetime
import json

warnings.filterwarnings('ignore')

# Set up logging
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
logger = logging.getLogger(__name__)

# Machine learning libraries
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

# Deep learning libraries
try:
    import tensorflow as tf
    from tensorflow.keras.models import Sequential, Model
    from tensorflow.keras.layers import (
        Input, Conv1D, LSTM, Dense, Dropout, 
        BatchNormalization, concatenate
    )
    from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau
    from tensorflow.keras.regularizers import l1_l2
    
    # Configure TensorFlow memory growth
    gpus = tf.config.experimental.list_physical_devices('GPU')
    if gpus:
        try:
            for gpu in gpus:
                tf.config.experimental.set_memory_growth(gpu, True)
            print("🎮 GPU memory growth enabled!")
        except RuntimeError as e:
            print(f"⚠️ GPU setup warning: {e}")
    else:
        print("🖥️ No GPUs found, using CPU")
        
    TF_AVAILABLE = True
except ImportError as e:
    print(f"❌ TensorFlow import failed: {e}")
    TF_AVAILABLE = False

# Technical analysis
try:
    import ta
    from ta.volatility import BollingerBands, AverageTrueRange
    from ta.trend import ADXIndicator, MACD, CCIIndicator
    from ta.momentum import StochasticOscillator, ROCIndicator, RSIIndicator
    TA_AVAILABLE = True
    print("✅ Technical analysis library loaded")
except ImportError as e:
    print(f"❌ Technical analysis library import failed: {e}")
    TA_AVAILABLE = False

# Data sources with fallbacks
YFINANCE_AVAILABLE = False
METATRADER_AVAILABLE = False

try:
    import yfinance as yf
    YFINANCE_AVAILABLE = True
    print("✅ Yahoo Finance available")
except ImportError:
    print("⚠️ Yahoo Finance not available")

try:
    # Try to import local modules with fallback
    from src.data.loader import load_or_fetch, load_metatrader_data
    METATRADER_AVAILABLE = True
    print("✅ MetaTrader 5 loader available")
except ImportError:
    print("⚠️ MetaTrader 5 loader not available - using fallbacks")
    
    # Fallback implementations
    def load_or_fetch(*args, **kwargs):
        raise NotImplementedError("MetaTrader loader not available")
    
    def load_metatrader_data(*args, **kwargs):
        raise NotImplementedError("MetaTrader loader not available")

# Check critical dependencies
if not TF_AVAILABLE:
    raise ImportError("TensorFlow is required but not available. Please install: pip install tensorflow")

if not TA_AVAILABLE:
    raise ImportError("Technical analysis library is required. Please install: pip install ta")

if not YFINANCE_AVAILABLE and not METATRADER_AVAILABLE:
    raise ImportError("No data source available. Please install: pip install yfinance")

print("\n✅ All critical libraries imported successfully")
print(f"Available data sources: Yahoo Finance: {YFINANCE_AVAILABLE}, MetaTrader: {METATRADER_AVAILABLE}")

## 3. Data Validation and Quality Checks

In [None]:
def validate_data_quality(df, symbol):
    """
    Validate data quality and report issues.
    
    Args:
        df: DataFrame to validate
        symbol: Symbol name for logging
    
    Returns:
        list: List of issues found
    """
    issues = []
    
    if df.empty:
        issues.append("DataFrame is empty")
        return issues
    
    # Check for minimum data points
    if len(df) < MIN_DATA_POINTS:
        issues.append(f"Insufficient data: {len(df)} rows, need at least {MIN_DATA_POINTS}")
    
    # Check for NaN values
    nan_counts = df.isnull().sum()
    total_cells = len(df) * len(df.columns)
    nan_ratio = nan_counts.sum() / total_cells
    
    if nan_ratio > MAX_NAN_RATIO:
        issues.append(f"High NaN ratio: {nan_ratio:.2%} > {MAX_NAN_RATIO:.2%}")
    
    if nan_counts.any():
        issues.append(f"NaN values found: {nan_counts[nan_counts > 0].to_dict()}")
    
    # Check for duplicate timestamps
    if df.index.duplicated().any():
        issues.append("Duplicate timestamps found")
    
    # Check for OHLC data if available
    price_cols = ['open', 'high', 'low', 'close']
    available_price_cols = [col for col in price_cols if col in df.columns]
    
    if available_price_cols:
        # Check for non-positive values
        for col in available_price_cols:
            if (df[col] <= 0).any():
                issues.append(f"Non-positive values in {col}")
        
        # Check OHLC logic if all available
        if all(col in df.columns for col in price_cols):
            if (df['high'] < df['low']).any():
                issues.append("High < Low detected")
            if (df['high'] < df['open']).any() or (df['high'] < df['close']).any():
                issues.append("High < Open/Close detected")
            if (df['low'] > df['open']).any() or (df['low'] > df['close']).any():
                issues.append("Low > Open/Close detected")
    
    return issues

def clean_data(df, symbol):
    """
    Clean and standardize data.
    
    Args:
        df: Raw DataFrame
        symbol: Symbol name
    
    Returns:
        pandas.DataFrame: Cleaned DataFrame
    """
    logger.info(f"Cleaning data for {symbol}...")
    
    # Remove duplicates
    df = df[~df.index.duplicated(keep='first')]
    
    # Sort by index
    df = df.sort_index()
    
    # Remove rows with all NaN values
    df = df.dropna(how='all')
    
    # Forward fill and backward fill reasonable amounts
    df = df.ffill(limit=3).bfill(limit=3)
    
    # Remove remaining NaN rows
    initial_len = len(df)
    df = df.dropna()
    final_len = len(df)
    
    if initial_len != final_len:
        logger.warning(f"{symbol}: Removed {initial_len - final_len} rows with NaN values")
    
    return df

def clear_memory():
    """
    Clear memory and run garbage collection.
    """
    gc.collect()
    if TF_AVAILABLE:
        tf.keras.backend.clear_session()

print("✅ Data validation functions defined")

## 4. Robust Data Loading with Fallbacks

In [None]:
def load_yahoo_finance_data(symbol, period="2y", interval="1h"):
    """
    Load data from Yahoo Finance with proper error handling.
    
    Args:
        symbol: Trading symbol
        period: Data period
        interval: Data interval
    
    Returns:
        pandas.DataFrame: OHLC data
    """
    if not YFINANCE_AVAILABLE:
        raise ImportError("Yahoo Finance not available")
    
    # Format symbol for Yahoo Finance
    if len(symbol) == 6 and symbol.isalpha():  # Forex pair like EURUSD
        ticker = f"{symbol}=X"
    else:
        ticker = symbol
    
    logger.info(f"Loading {symbol} from Yahoo Finance as {ticker}...")
    
    try:
        df = yf.download(ticker, period=period, interval=interval, progress=False)
        
        if df.empty:
            raise ValueError(f"No data received for {ticker}")
        
        # Reset index to get datetime as column
        df = df.reset_index()
        
        # Standardize column names
        column_mapping = {
            'Datetime': 'time',
            'Date': 'time',
            'Open': 'open',
            'High': 'high', 
            'Low': 'low',
            'Close': 'close',
            'Volume': 'volume',
            'Adj Close': 'adj_close'
        }
        
        df = df.rename(columns=column_mapping)
        
        # Ensure required columns exist
        required_cols = ['time', 'open', 'high', 'low', 'close']
        missing_cols = [col for col in required_cols if col not in df.columns]
        
        if missing_cols:
            raise ValueError(f"Missing required columns: {missing_cols}")
        
        # Add volume if missing
        if 'volume' not in df.columns:
            df['volume'] = 0
            logger.warning(f"Volume data not available for {symbol}, using zeros")
        
        df['tick_volume'] = df['volume']
        
        # Convert time and set as index
        df['time'] = pd.to_datetime(df['time'])
        df = df.set_index('time')
        
        # Clean the data
        df = clean_data(df, symbol)
        
        # Validate data quality
        issues = validate_data_quality(df, symbol)
        if issues:
            logger.warning(f"Data quality issues for {symbol}: {issues}")
            if any("Insufficient data" in issue for issue in issues):
                raise ValueError(f"Data quality issues: {issues}")
        
        logger.info(f"✅ Successfully loaded {len(df)} records for {symbol}")
        return df
        
    except Exception as e:
        logger.error(f"Failed to load {symbol} from Yahoo Finance: {str(e)}")
        raise

def load_metatrader_data_safe(symbol, broker="amp_global", interval="H1"):
    """
    Safely load MetaTrader data with error handling.
    
    Args:
        symbol: Trading symbol
        broker: Broker name
        interval: Time interval
    
    Returns:
        pandas.DataFrame: OHLC data
    """
    if not METATRADER_AVAILABLE:
        raise ImportError("MetaTrader loader not available")
    
    logger.info(f"Loading {symbol} from MetaTrader...")
    
    try:
        df = load_or_fetch(
            symbol=symbol,
            provider="metatrader",
            loader_func=load_metatrader_data,
            api_key="",
            interval=interval,
            broker=broker
        )
        
        if df.empty:
            raise ValueError(f"No data received for {symbol}")
        
        # Standardize columns
        required_cols = ['time', 'open', 'high', 'low', 'close']
        missing_cols = [col for col in required_cols if col not in df.columns]
        
        if missing_cols:
            raise ValueError(f"Missing required columns: {missing_cols}")
        
        # Ensure volume column
        volume_col = 'tick_volume' if 'tick_volume' in df.columns else 'volume'
        if volume_col not in df.columns:
            df['tick_volume'] = 0
        else:
            df['tick_volume'] = df[volume_col]
        
        # Convert time and set index
        df['time'] = pd.to_datetime(df['time'])
        df = df.set_index('time')
        
        # Clean the data
        df = clean_data(df, symbol)
        
        logger.info(f"✅ Successfully loaded {len(df)} records for {symbol}")
        return df
        
    except Exception as e:
        logger.error(f"Failed to load {symbol} from MetaTrader: {str(e)}")
        raise

def load_forex_data(symbols, provider="yahoo", broker="amp_global", interval="H1"):
    """
    Load forex data for multiple symbols with robust error handling.
    
    Args:
        symbols: List of symbol names
        provider: Data provider ('metatrader' or 'yahoo')
        broker: Broker name for MetaTrader
        interval: Time interval
    
    Returns:
        pandas.DataFrame: MultiIndex DataFrame with (symbol, field) columns
    """
    logger.info(f"Loading data for {len(symbols)} symbols using {provider}...")
    
    data = {}
    successful_loads = 0
    
    for symbol in symbols:
        logger.info(f"📥 Loading data for {symbol}...")
        
        df = None
        
        # Try primary provider first
        try:
            if provider == "metatrader" and METATRADER_AVAILABLE:
                df = load_metatrader_data_safe(symbol, broker, interval)
            elif provider == "yahoo" and YFINANCE_AVAILABLE:
                df = load_yahoo_finance_data(symbol)
            else:
                raise ValueError(f"Provider {provider} not available")
                
        except Exception as e:
            logger.warning(f"Primary provider failed for {symbol}: {str(e)}")
            
            # Try fallback provider
            try:
                if provider != "yahoo" and YFINANCE_AVAILABLE:
                    logger.info(f"Trying Yahoo Finance fallback for {symbol}...")
                    df = load_yahoo_finance_data(symbol)
                elif provider != "metatrader" and METATRADER_AVAILABLE:
                    logger.info(f"Trying MetaTrader fallback for {symbol}...")
                    df = load_metatrader_data_safe(symbol, broker, interval)
                else:
                    logger.error(f"No fallback available for {symbol}")
                    continue
                    
            except Exception as e2:
                logger.error(f"All providers failed for {symbol}: {str(e2)}")
                continue
        
        # If we successfully loaded data
        if df is not None and not df.empty:
            # Add to MultiIndex structure
            for col in ['open', 'high', 'low', 'close', 'tick_volume']:
                if col in df.columns:
                    data[(symbol, col)] = df[col]
            
            successful_loads += 1
            logger.info(f"✅ Successfully loaded {len(df)} records for {symbol}")
        else:
            logger.error(f"❌ Failed to load any data for {symbol}")
    
    if successful_loads == 0:
        raise ValueError("No data loaded successfully for any symbol")
    
    if successful_loads < len(symbols):
        logger.warning(f"Only loaded {successful_loads}/{len(symbols)} symbols successfully")
    
    prices_df = pd.DataFrame(data)
    
    if prices_df.empty:
        raise ValueError("Final dataset is empty")
    
    logger.info(f"\n📊 Final dataset shape: {prices_df.shape}")
    logger.info(f"Date range: {prices_df.index.min()} to {prices_df.index.max()}")
    logger.info(f"Successful symbols: {[col[0] for col in prices_df.columns[::5]]}")
    
    return prices_df

print("✅ Robust data loading functions defined")

## 5. Load Data with Error Handling

In [None]:
# Load data for all symbols with comprehensive error handling
try:
    logger.info("🔄 Starting data loading process...")
    
    prices = load_forex_data(
        symbols=ALL_SYMBOLS,
        provider=PROVIDER,
        broker=BROKER,
        interval=INTERVAL
    )
    
    logger.info("✅ Data loading completed successfully")
    
    # Display sample data
    print("\n📈 Sample data (first 5 rows):")
    print(prices.head())
    
    print("\n📊 Data summary:")
    print(f"Shape: {prices.shape}")
    print(f"Date range: {prices.index.min()} to {prices.index.max()}")
    print(f"Available symbols: {list(set([col[0] for col in prices.columns]))}")
    
    # Check data availability for target symbols
    available_target_symbols = []
    for symbol in SYMBOLS:
        if (symbol, 'close') in prices.columns:
            available_target_symbols.append(symbol)
            data_points = prices[(symbol, 'close')].count()
            print(f"  {symbol}: {data_points} data points")
    
    if not available_target_symbols:
        raise ValueError(f"None of the target symbols {SYMBOLS} are available in loaded data")
    
    SYMBOLS = available_target_symbols  # Update to only available symbols
    logger.info(f"Updated target symbols to available ones: {SYMBOLS}")
    
except Exception as e:
    logger.error(f"Data loading failed: {str(e)}")
    print(f"\n❌ Data loading failed: {str(e)}")
    print("\nPlease check:")
    print("1. Internet connection")
    print("2. Symbol names are correct")
    print("3. Required packages are installed (yfinance, ta)")
    raise

## 6. Feature Engineering with Robust Error Handling

In [ ]:
def calculate_relative_currency_strength(prices_df):
    """
    Calculate Relative Currency Strength (RCS) with robust error handling.
    
    Args:
        prices_df: MultiIndex DataFrame with (symbol, 'close') columns
    
    Returns:
        pandas.DataFrame: RCS values for each currency
    """
    logger.info("🧮 Calculating Relative Currency Strength...")
    
    try:
        # Extract available close prices only
        close_prices = {}
        available_symbols = []
        
        for col in prices_df.columns:
            if len(col) == 2 and col[1] == 'close':  # (symbol, 'close')
                symbol = col[0]
                if len(symbol) == 6 and symbol.isalpha():  # Valid forex pair
                    available_symbols.append(symbol)
                    close_prices[symbol] = prices_df[col]
        
        if not close_prices:
            raise ValueError("No valid close price data available for RCS calculation")
        
        logger.info(f"Calculating RCS for symbols: {available_symbols}")
        
        close_df = pd.DataFrame(close_prices)
        log_returns = np.log(close_df / close_df.shift(1)).dropna()
        
        if log_returns.empty:
            raise ValueError("No valid log returns calculated")
        
        # Extract unique currencies from available symbols
        currencies = list(set([s[:3] for s in available_symbols] + [s[3:6] for s in available_symbols]))
        currencies = [c for c in currencies if len(c) == 3 and c.isalpha()]
        
        logger.info(f"Identified currencies: {currencies}")
        
        # Calculate RCS
        rcs_data = {c: [] for c in currencies}
        
        for i in range(len(log_returns)):
            row = log_returns.iloc[i]
            daily_strength = {c: 0 for c in currencies}
            counts = {c: 0 for c in currencies}
            
            for pair, ret in row.items():
                if pd.notna(ret) and len(pair) == 6:
                    base, quote = pair[:3], pair[3:]
                    if base in daily_strength and quote in daily_strength:
                        daily_strength[base] += ret
                        daily_strength[quote] -= ret
                        counts[base] += 1
                        counts[quote] += 1
            
            for c in currencies:
                avg_strength = daily_strength[c] / counts[c] if counts[c] > 0 else 0
                rcs_data[c].append(avg_strength)
        
        rcs_df = pd.DataFrame(rcs_data, index=log_returns.index)
        
        # Validate RCS data
        if rcs_df.empty:
            raise ValueError("RCS calculation resulted in empty DataFrame")
        
        # Check for excessive NaN values
        nan_ratio = rcs_df.isnull().sum().sum() / (len(rcs_df) * len(rcs_df.columns))
        if nan_ratio > MAX_NAN_RATIO:
            logger.warning(f"High NaN ratio in RCS data: {nan_ratio:.2%}")
        
        # Clean RCS data using modern pandas methods
        rcs_df = rcs_df.ffill().bfill().fillna(0)
        
        logger.info(f"✅ RCS calculated successfully for {len(currencies)} currencies")
        logger.info(f"RCS data shape: {rcs_df.shape}")
        
        return rcs_df
        
    except Exception as e:
        logger.error(f"RCS calculation failed: {str(e)}")
        # Return empty DataFrame with proper structure
        return pd.DataFrame()

def calculate_technical_indicators(prices_df, symbol):
    """
    Calculate technical indicators with comprehensive error handling.
    
    Args:
        prices_df: MultiIndex DataFrame with OHLC data
        symbol: Symbol name
    
    Returns:
        pandas.DataFrame: Technical indicators
    """
    logger.info(f"📊 Calculating technical indicators for {symbol}...")
    
    try:
        # Extract OHLC data with fallbacks
        ohlc_data = {}
        for field in ['open', 'high', 'low', 'close']:
            if (symbol, field) in prices_df.columns:
                ohlc_data[field] = prices_df[(symbol, field)]
            elif (symbol, 'close') in prices_df.columns:
                # Fallback to close price if field missing
                ohlc_data[field] = prices_df[(symbol, 'close')]
                if field != 'close':
                    logger.warning(f"Using close price as fallback for {field} in {symbol}")
            else:
                raise ValueError(f"No price data available for {symbol}")
        
        close = ohlc_data['close'].dropna()
        high = ohlc_data['high'].dropna()
        low = ohlc_data['low'].dropna()
        
        if len(close) < 50:  # Need minimum data for indicators
            raise ValueError(f"Insufficient data for {symbol}: only {len(close)} points")
        
        indicators = pd.DataFrame(index=close.index)
        
        # Momentum indicators with error handling
        try:
            indicators['rsi'] = RSIIndicator(close=close, window=14).rsi()
        except Exception as e:
            logger.warning(f"RSI calculation failed for {symbol}: {e}")
            indicators['rsi'] = 50  # Neutral RSI
        
        try:
            indicators['roc'] = ROCIndicator(close=close, window=10).roc()
        except Exception as e:
            logger.warning(f"ROC calculation failed for {symbol}: {e}")
            indicators['roc'] = 0
        
        indicators['momentum'] = close.pct_change(periods=10).fillna(0)
        
        # Trend indicators
        try:
            macd = MACD(close=close)
            indicators['macd'] = macd.macd()
            indicators['macd_signal'] = macd.macd_signal()
            indicators['macd_histogram'] = macd.macd_diff()
        except Exception as e:
            logger.warning(f"MACD calculation failed for {symbol}: {e}")
            indicators['macd'] = 0
            indicators['macd_signal'] = 0
            indicators['macd_histogram'] = 0
        
        try:
            indicators['cci'] = CCIIndicator(high=high, low=low, close=close).cci()
        except Exception as e:
            logger.warning(f"CCI calculation failed for {symbol}: {e}")
            indicators['cci'] = 0
        
        try:
            indicators['adx'] = ADXIndicator(high=high, low=low, close=close).adx()
        except Exception as e:
            logger.warning(f"ADX calculation failed for {symbol}: {e}")
            indicators['adx'] = 25  # Neutral ADX
        
        # Volatility indicators
        try:
            indicators['atr'] = AverageTrueRange(high=high, low=low, close=close).average_true_range()
        except Exception as e:
            logger.warning(f"ATR calculation failed for {symbol}: {e}")
            indicators['atr'] = close.rolling(20).std().fillna(0)
        
        try:
            bb = BollingerBands(close=close)
            indicators['bb_upper'] = bb.bollinger_hband()
            indicators['bb_lower'] = bb.bollinger_lband()
            indicators['bb_width'] = (bb.bollinger_hband() - bb.bollinger_lband()) / bb.bollinger_mavg()
            indicators['bb_position'] = (close - bb.bollinger_lband()) / (bb.bollinger_hband() - bb.bollinger_lband())
        except Exception as e:
            logger.warning(f"Bollinger Bands calculation failed for {symbol}: {e}")
            sma = close.rolling(20).mean()
            std = close.rolling(20).std()
            indicators['bb_upper'] = sma + 2 * std
            indicators['bb_lower'] = sma - 2 * std
            indicators['bb_width'] = (2 * std) / sma
            indicators['bb_position'] = 0.5
        
        # Stochastic oscillator
        try:
            stoch = StochasticOscillator(high=high, low=low, close=close)
            indicators['stoch_k'] = stoch.stoch()
            indicators['stoch_d'] = stoch.stoch_signal()
        except Exception as e:
            logger.warning(f"Stochastic calculation failed for {symbol}: {e}")
            indicators['stoch_k'] = 50
            indicators['stoch_d'] = 50
        
        # Price-based features
        indicators['return_1h'] = close.pct_change(1).fillna(0)
        indicators['return_4h'] = close.pct_change(4).fillna(0)
        indicators['return_24h'] = close.pct_change(24).fillna(0)
        
        # Rolling statistics
        indicators['sma_5'] = close.rolling(window=5).mean()
        indicators['sma_20'] = close.rolling(window=20).mean()
        indicators['ema_12'] = close.ewm(span=12).mean()
        indicators['ema_26'] = close.ewm(span=26).mean()
        
        indicators['volatility_5'] = close.rolling(window=5).std()
        indicators['volatility_20'] = close.rolling(window=20).std()
        
        # Price position indicators
        rolling_min_5 = close.rolling(5).min()
        rolling_max_5 = close.rolling(5).max()
        indicators['price_position_5'] = ((close - rolling_min_5) / 
                                        (rolling_max_5 - rolling_min_5).replace(0, np.nan)).fillna(0.5)
        
        rolling_min_20 = close.rolling(20).min()
        rolling_max_20 = close.rolling(20).max()
        indicators['price_position_20'] = ((close - rolling_min_20) / 
                                         (rolling_max_20 - rolling_min_20).replace(0, np.nan)).fillna(0.5)
        
        # Time-based features
        indicators['hour'] = indicators.index.hour
        indicators['day_of_week'] = indicators.index.dayofweek
        indicators['month'] = indicators.index.month
        
        # Clean indicators data using modern pandas methods
        indicators = indicators.ffill().bfill()
        
        # Replace any remaining infinite values
        indicators = indicators.replace([np.inf, -np.inf], np.nan).fillna(0)
        
        logger.info(f"✅ Calculated {len(indicators.columns)} technical indicators for {symbol}")
        logger.info(f"Indicators data shape: {indicators.shape}")
        
        return indicators
        
    except Exception as e:
        logger.error(f"Technical indicators calculation failed for {symbol}: {str(e)}")
        raise

print("✅ Feature engineering functions defined")

## 7. Calculate Features

In [None]:
try:
    # Calculate RCS
    logger.info("Starting RCS calculation...")
    rcs = calculate_relative_currency_strength(prices)
    
    if rcs.empty:
        logger.warning("RCS calculation failed, proceeding without RCS features")
        rcs = pd.DataFrame()  # Empty RCS for fallback
    else:
        logger.info(f"✅ RCS calculated successfully: {rcs.shape}")
    
    # Calculate technical indicators for target symbols
    all_features = {}
    
    for symbol in SYMBOLS:
        if (symbol, 'close') in prices.columns:
            try:
                logger.info(f"Calculating indicators for {symbol}...")
                indicators = calculate_technical_indicators(prices, symbol)
                all_features[symbol] = indicators
                logger.info(f"📊 {symbol}: {indicators.shape[0]} rows, {indicators.shape[1]} features")
            except Exception as e:
                logger.error(f"Failed to calculate indicators for {symbol}: {str(e)}")
                continue
        else:
            logger.warning(f"No close price data available for {symbol}")
    
    if not all_features:
        raise ValueError("No technical indicators calculated for any symbol")
    
    # Update SYMBOLS to only include successfully processed ones
    SYMBOLS = list(all_features.keys())
    logger.info(f"Successfully processed symbols: {SYMBOLS}")
    
    print("\n✅ Feature engineering completed successfully")
    
    # Display feature summary
    for symbol in SYMBOLS:
        features = all_features[symbol]
        print(f"\n{symbol} features:")
        print(f"  Shape: {features.shape}")
        print(f"  Date range: {features.index.min()} to {features.index.max()}")
        print(f"  Sample features: {list(features.columns[:5])}...")
        
        # Check data quality
        nan_count = features.isnull().sum().sum()
        if nan_count > 0:
            print(f"  ⚠️ NaN values: {nan_count}")
        else:
            print(f"  ✅ No NaN values")
    
    if not rcs.empty:
        print(f"\nRCS features: {list(rcs.columns)}")
        print(f"RCS shape: {rcs.shape}")
    else:
        print("\n⚠️ RCS features not available - proceeding with technical indicators only")
    
except Exception as e:
    logger.error(f"Feature engineering failed: {str(e)}")
    print(f"\n❌ Feature engineering failed: {str(e)}")
    raise

## 8. Target Variable Creation and Data Preparation

In [ ]:
def create_target_variable(prices_df, symbol, prediction_horizon=1):
    """
    Create binary target variable for price direction prediction with validation.
    
    Args:
        prices_df: MultiIndex DataFrame with price data
        symbol: Symbol name
        prediction_horizon: Number of periods ahead to predict
    
    Returns:
        pandas.Series: Binary target (1 = price up, 0 = price down)
    """
    logger.info(f"Creating target variable for {symbol}...")
    
    try:
        if (symbol, 'close') not in prices_df.columns:
            raise ValueError(f"No close price data for {symbol}")
        
        close_prices = prices_df[(symbol, 'close')].dropna()
        
        if len(close_prices) < prediction_horizon + 1:
            raise ValueError(f"Insufficient data for target creation: {len(close_prices)} points")
        
        future_prices = close_prices.shift(-prediction_horizon)
        target = (future_prices > close_prices).astype(int)
        
        # Remove NaN values
        target = target.dropna()
        
        if target.empty:
            raise ValueError("Target variable is empty after processing")
        
        # Check class balance
        class_counts = target.value_counts()
        if len(class_counts) < 2:
            raise ValueError(f"Target has only one class: {class_counts.index[0]}")
        
        class_balance = class_counts.min() / class_counts.sum()
        logger.info(f"Target class balance for {symbol}: {class_balance:.2%} minority class")
        
        if class_balance < 0.05:
            logger.warning(f"Severe class imbalance detected for {symbol}: {class_counts.to_dict()}")
        
        target.name = symbol
        logger.info(f"✅ Target created for {symbol}: {len(target)} samples")
        
        return target
        
    except Exception as e:
        logger.error(f"Target creation failed for {symbol}: {str(e)}")
        raise

def prepare_features_and_target(indicators_df, rcs_df, target_series, lookback_window):
    """
    Prepare feature matrix and target with comprehensive validation.
    
    Args:
        indicators_df: Technical indicators DataFrame
        rcs_df: Relative Currency Strength DataFrame
        target_series: Target variable Series
        lookback_window: Number of time steps for sequences
    
    Returns:
        tuple: (X, y, feature_names, scaler)
    """
    logger.info(f"🔧 Preparing features and target for {target_series.name}...")
    
    try:
        # Validate inputs
        if indicators_df.empty:
            raise ValueError("Indicators DataFrame is empty")
        
        if len(indicators_df) < lookback_window * 2:
            raise ValueError(f"Insufficient indicator data: {len(indicators_df)} rows, need at least {lookback_window * 2}")
        
        if target_series.empty:
            raise ValueError("Target series is empty")
        
        # Start with indicators as base features
        combined_features = indicators_df.copy()
        
        # Add RCS features if available
        if not rcs_df.empty:
            # Align indices first
            common_rcs_index = indicators_df.index.intersection(rcs_df.index)
            
            if len(common_rcs_index) > lookback_window:
                # Add available RCS features
                available_currencies = [col for col in rcs_df.columns 
                                      if col in ['USD', 'EUR', 'GBP', 'JPY', 'AUD', 'CAD', 'CHF', 'NZD']]
                
                for currency in available_currencies:
                    if currency in rcs_df.columns:
                        rcs_feature_name = f'rcs_{currency}'
                        # Align RCS data with indicators
                        aligned_rcs = rcs_df[currency].reindex(indicators_df.index)
                        combined_features[rcs_feature_name] = aligned_rcs
                
                logger.info(f"Added {len(available_currencies)} RCS features")
            else:
                logger.warning("Insufficient RCS data overlap, skipping RCS features")
        else:
            logger.info("No RCS data available, using only technical indicators")
        
        # Align with target
        common_index = combined_features.index.intersection(target_series.index)
        
        if len(common_index) < lookback_window * 2:
            raise ValueError(f"Insufficient aligned data: {len(common_index)} points, need at least {lookback_window * 2}")
        
        # Select aligned data
        X_df = combined_features.loc[common_index].copy()
        y_series = target_series.loc[common_index].copy()
        
        # Clean features data
        logger.info(f"Cleaning features data...")
        
        # Forward fill and backward fill using modern pandas methods
        X_df = X_df.ffill(limit=5).bfill(limit=5)
        
        # Replace infinite values
        X_df = X_df.replace([np.inf, -np.inf], np.nan)
        
        # Drop columns with too many NaN values
        nan_threshold = len(X_df) * MAX_NAN_RATIO
        X_df = X_df.dropna(axis=1, thresh=len(X_df) - nan_threshold)
        
        # Drop rows with any remaining NaN values
        initial_rows = len(X_df)
        X_df = X_df.dropna()
        y_series = y_series.loc[X_df.index]
        
        if len(X_df) != initial_rows:
            logger.warning(f"Removed {initial_rows - len(X_df)} rows with NaN values")
        
        if len(X_df) < lookback_window:
            raise ValueError(f"After cleaning, insufficient data: {len(X_df)} rows")
        
        logger.info(f"Final aligned data: {len(X_df)} rows, {len(X_df.columns)} features")
        
        # Scale features
        logger.info("Scaling features...")
        scaler = StandardScaler()
        
        try:
            X_scaled = scaler.fit_transform(X_df)
        except Exception as e:
            logger.error(f"Feature scaling failed: {str(e)}")
            # Try robust scaling as fallback
            from sklearn.preprocessing import RobustScaler
            scaler = RobustScaler()
            X_scaled = scaler.fit_transform(X_df)
            logger.info("Used RobustScaler as fallback")
        
        # Create sequences for LSTM
        logger.info(f"Creating sequences with lookback window: {lookback_window}...")
        
        X_sequences = []
        y_sequences = []
        
        for i in range(lookback_window, len(X_scaled)):
            X_sequences.append(X_scaled[i-lookback_window:i])
            y_sequences.append(y_series.iloc[i])
        
        if len(X_sequences) == 0:
            raise ValueError("No sequences created - check data length and lookback window")
        
        X = np.array(X_sequences)
        y = np.array(y_sequences)
        
        feature_names = X_df.columns.tolist()
        
        # Final validation
        if X.shape[0] < MIN_TRAINING_SAMPLES:
            raise ValueError(f"Insufficient sequences for training: {X.shape[0]}, need at least {MIN_TRAINING_SAMPLES}")
        
        # Check for feature variance
        feature_vars = np.var(X_scaled, axis=0)
        zero_var_features = np.sum(feature_vars == 0)
        if zero_var_features > 0:
            logger.warning(f"Found {zero_var_features} features with zero variance")
        
        logger.info(f"✅ Prepared sequences successfully:")
        logger.info(f"  X shape: {X.shape}")
        logger.info(f"  y shape: {y.shape}")
        logger.info(f"  Features: {len(feature_names)}")
        
        return X, y, feature_names, scaler
        
    except Exception as e:
        logger.error(f"Feature preparation failed: {str(e)}")
        raise

print("✅ Data preparation functions defined")

## 9. Prepare Data for All Symbols

In [None]:
# Prepare data for each target symbol
prepared_data = {}
successful_preparations = 0

for symbol in SYMBOLS:
    try:
        logger.info(f"\n🎯 Preparing data for {symbol}...")
        
        if symbol not in all_features:
            logger.warning(f"No features available for {symbol}, skipping")
            continue
        
        if (symbol, 'close') not in prices.columns:
            logger.warning(f"No price data available for {symbol}, skipping")
            continue
        
        # Create target
        target = create_target_variable(prices, symbol)
        
        # Prepare features
        X, y, feature_names, scaler = prepare_features_and_target(
            all_features[symbol], rcs, target, LOOKBACK_WINDOW
        )
        
        prepared_data[symbol] = {
            'X': X,
            'y': y,
            'feature_names': feature_names,
            'scaler': scaler,
            'target_distribution': pd.Series(y).value_counts(normalize=True)
        }
        
        successful_preparations += 1
        
        logger.info(f"✅ {symbol} data preparation completed")
        print(f"\n📊 {symbol} Summary:")
        print(f"  Sequences: {X.shape[0]}")
        print(f"  Timesteps: {X.shape[1]}")
        print(f"  Features: {X.shape[2]}")
        print(f"  Target distribution:")
        for class_val, prob in prepared_data[symbol]['target_distribution'].items():
            print(f"    Class {class_val}: {prob:.2%}")
        
    except Exception as e:
        logger.error(f"Data preparation failed for {symbol}: {str(e)}")
        print(f"❌ Failed to prepare data for {symbol}: {str(e)}")
        continue

if successful_preparations == 0:
    raise ValueError("No symbols successfully prepared for training")

# Update SYMBOLS to only successfully prepared ones
SYMBOLS = list(prepared_data.keys())

logger.info(f"\n✅ Data preparation completed for {successful_preparations} symbols: {SYMBOLS}")
print(f"\n🎉 Successfully prepared data for {successful_preparations} symbols: {SYMBOLS}")

# Clear memory
clear_memory()