# Phase 6: Multi-Asset Scaling

## Problem & Solution
**Phase 5 achieved excellent results** but with limited data:
- Train: 9,496 samples (EURUSD only)
- Strong signal direction: **67.3%** accuracy
- Overfitting: **CRUSHED** (gap = -0.018)

**Scaling Strategy:**
1. Load **5 FX pairs** (EURUSD, GBPUSD, USDJPY, USDCAD, USDCHF)
2. Combine all data → **~50,000 samples**
3. Add asset embedding (one-hot encoding per pair)
4. Train larger model without overfitting
5. Test generalization per asset

## Expected Benefits:
- **10x more strong signals** for training
- **Cross-asset patterns** (USD strength, volatility regimes)
- **Better calibration** of heatmap predictions
- **Increased model capacity** without overfitting

---

## 1. Import Libraries

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from pathlib import Path
import json
from typing import List, Tuple

# TensorFlow/Keras
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers, Model, callbacks, regularizers
from tensorflow.keras.optimizers import Adam

# Sklearn
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from sklearn.preprocessing import StandardScaler

print(f"TensorFlow version: {tf.__version__}")
print(f"GPU available: {tf.config.list_physical_devices('GPU')}")

# Set random seeds
def set_seed(seed=42):
    np.random.seed(seed)
    tf.random.set_seed(seed)
    
set_seed(42)

## 2. Load Multi-Asset Data

Use the pre-processed `featured_labeled` datasets

In [None]:
# Paths
DATA_DIR = Path('../DATA/4-hours Pepperstone')
PHASE6_MODEL_DIR = Path('../MODELS_PHASE6')
PHASE6_MODEL_DIR.mkdir(exist_ok=True)

# Asset pairs to load
ASSETS = ['EURUSD', 'GBPUSD', 'USDJPY', 'USDCAD', 'USDCHF']

# Load all featured datasets
print("Loading multi-asset data...")
asset_data = {}

for asset in ASSETS:
    file_path = DATA_DIR / f'4-hours_{asset}_featured_labeled.csv'
    if file_path.exists():
        df = pd.read_csv(file_path, index_col=0)
        df['asset'] = asset  # Add asset identifier
        asset_data[asset] = df
        print(f"  {asset}: {len(df)} samples")
    else:
        print(f"  {asset}: FILE NOT FOUND - {file_path}")

# Combine all assets
if asset_data:
    df_combined = pd.concat(asset_data.values(), axis=0, ignore_index=True)
    print(f"\nTotal combined samples: {len(df_combined)}")
else:
    raise FileNotFoundError("No featured_labeled datasets found!")

## 3. Data Preprocessing

Extract features and create sliding windows with asset embeddings

In [None]:
# Feature columns (based on Phase 1-5)
FEATURE_COLS = [
    'log_return_1', 'log_return_5', 'log_return_20',
    'rsi_14', 'rsi_50',
    'rolling_mean_20', 'rolling_std_20', 'rolling_min_20', 'rolling_max_20',
    'rolling_mean_50', 'rolling_std_50',
    'ema_span_20', 'ema_span_50',
    'macd', 'macd_signal', 'macd_diff',
    'bb_upper', 'bb_middle', 'bb_lower', 'bb_width',
    'atr_14',
    'hour', 'day_of_week',
    'volume_close'
]

TARGET_COL = 'signed_heatmap_entry'

# Check which features exist
available_features = [col for col in FEATURE_COLS if col in df_combined.columns]
print(f"\nAvailable features: {len(available_features)}/{len(FEATURE_COLS)}")
print(f"Features: {available_features}")

# Check target
if TARGET_COL not in df_combined.columns:
    print(f"\nERROR: Target column '{TARGET_COL}' not found!")
    print(f"Available columns: {df_combined.columns.tolist()}")
else:
    print(f"\nTarget column '{TARGET_COL}' found.")

In [None]:
# Create asset embeddings (one-hot encoding)
asset_dummies = pd.get_dummies(df_combined['asset'], prefix='asset')
df_combined = pd.concat([df_combined, asset_dummies], axis=1)

# Update feature list to include asset embeddings
asset_embedding_cols = asset_dummies.columns.tolist()
print(f"\nAsset embeddings: {asset_embedding_cols}")

# Final feature list
ALL_FEATURES = available_features + asset_embedding_cols
print(f"\nTotal features (including asset embeddings): {len(ALL_FEATURES)}")

## 4. Create Sliding Windows Per Asset

Process each asset separately to avoid data leakage across assets

In [None]:
def create_sliding_windows(df, feature_cols, target_col, window_size=60, asset_name=None):
    """
    Create sliding windows for time series data.
    
    Args:
        df: DataFrame with features and target
        feature_cols: List of feature column names
        target_col: Target column name
        window_size: Number of timesteps to look back
        asset_name: Optional asset name for logging
    
    Returns:
        X: Array of shape (n_samples, window_size, n_features)
        y: Array of shape (n_samples,)
    """
    # Drop NaN and inf
    df_clean = df[feature_cols + [target_col]].replace([np.inf, -np.inf], np.nan).dropna()
    
    if len(df_clean) < window_size + 1:
        if asset_name:
            print(f"  {asset_name}: Not enough data ({len(df_clean)} samples)")
        return None, None
    
    X_list = []
    y_list = []
    
    for i in range(len(df_clean) - window_size):
        # Window of features
        X_window = df_clean[feature_cols].iloc[i:i+window_size].values
        
        # Target at next timestep
        y_target = df_clean[target_col].iloc[i+window_size]
        
        X_list.append(X_window)
        y_list.append(y_target)
    
    X = np.array(X_list)
    y = np.array(y_list)
    
    if asset_name:
        print(f"  {asset_name}: {len(X)} windows created")
    
    return X, y

# Create windows for each asset
WINDOW_SIZE = 60
print(f"Creating sliding windows (window_size={WINDOW_SIZE})...\n")

X_by_asset = {}
y_by_asset = {}

for asset in ASSETS:
    if asset in asset_data:
        # Get asset-specific data with embeddings
        asset_df = df_combined[df_combined['asset'] == asset].copy()
        
        # Create windows
        X, y = create_sliding_windows(
            asset_df, 
            ALL_FEATURES, 
            TARGET_COL, 
            window_size=WINDOW_SIZE,
            asset_name=asset
        )
        
        if X is not None:
            X_by_asset[asset] = X
            y_by_asset[asset] = y

print(f"\nTotal assets processed: {len(X_by_asset)}")

## 5. Train/Val/Test Split

**Strategy**: 
- Train on first 70% of each asset
- Val on next 15%
- Test on last 15%
- Combine all assets for training

In [None]:
def split_data(X, y, train_ratio=0.7, val_ratio=0.15):
    """Time-series split"""
    n = len(X)
    train_end = int(n * train_ratio)
    val_end = int(n * (train_ratio + val_ratio))
    
    X_train = X[:train_end]
    y_train = y[:train_end]
    X_val = X[train_end:val_end]
    y_val = y[train_end:val_end]
    X_test = X[val_end:]
    y_test = y[val_end:]
    
    return X_train, X_val, X_test, y_train, y_val, y_test

# Split each asset
splits_by_asset = {}
print("Splitting data by asset...\n")

for asset in X_by_asset.keys():
    X_train, X_val, X_test, y_train, y_val, y_test = split_data(
        X_by_asset[asset], 
        y_by_asset[asset]
    )
    
    splits_by_asset[asset] = {
        'X_train': X_train, 'X_val': X_val, 'X_test': X_test,
        'y_train': y_train, 'y_val': y_val, 'y_test': y_test
    }
    
    print(f"{asset}:")
    print(f"  Train: {len(X_train)} samples")
    print(f"  Val: {len(X_val)} samples")
    print(f"  Test: {len(X_test)} samples")

# Combine all training data
X_train_combined = np.concatenate([s['X_train'] for s in splits_by_asset.values()], axis=0)
y_train_combined = np.concatenate([s['y_train'] for s in splits_by_asset.values()], axis=0)
X_val_combined = np.concatenate([s['X_val'] for s in splits_by_asset.values()], axis=0)
y_val_combined = np.concatenate([s['y_val'] for s in splits_by_asset.values()], axis=0)
X_test_combined = np.concatenate([s['X_test'] for s in splits_by_asset.values()], axis=0)
y_test_combined = np.concatenate([s['y_test'] for s in splits_by_asset.values()], axis=0)

print(f"\nCombined dataset:")
print(f"  Train: {len(X_train_combined)} samples")
print(f"  Val: {len(X_val_combined)} samples")
print(f"  Test: {len(X_test_combined)} samples")
print(f"  Features: {X_train_combined.shape[2]}")
print(f"  Window size: {X_train_combined.shape[1]}")

## 6. Feature Scaling

In [None]:
# Reshape for scaling
n_train, n_timesteps, n_features = X_train_combined.shape
X_train_reshaped = X_train_combined.reshape(-1, n_features)

# Fit scaler on training data
scaler = StandardScaler()
scaler.fit(X_train_reshaped)

# Scale all datasets
def scale_data(X):
    n_samples, n_steps, n_feat = X.shape
    X_reshaped = X.reshape(-1, n_feat)
    X_scaled = scaler.transform(X_reshaped)
    return X_scaled.reshape(n_samples, n_steps, n_feat)

X_train_scaled = scale_data(X_train_combined)
X_val_scaled = scale_data(X_val_combined)
X_test_scaled = scale_data(X_test_combined)

print("Feature scaling complete.")
print(f"  Train data shape: {X_train_scaled.shape}")
print(f"  Train data range: [{X_train_scaled.min():.2f}, {X_train_scaled.max():.2f}]")

## 7. Prepare Dual Targets

In [None]:
# Classification: strong signal indicator (|heatmap| > 0.3)
CONFIDENCE_THRESHOLD = 0.3

y_train_class = (np.abs(y_train_combined) > CONFIDENCE_THRESHOLD).astype(np.float32)
y_val_class = (np.abs(y_val_combined) > CONFIDENCE_THRESHOLD).astype(np.float32)
y_test_class = (np.abs(y_test_combined) > CONFIDENCE_THRESHOLD).astype(np.float32)

# Regression: signed heatmap
y_train_reg = y_train_combined.astype(np.float32)
y_val_reg = y_val_combined.astype(np.float32)
y_test_reg = y_test_combined.astype(np.float32)

print(f"\nDual targets prepared:")
print(f"  Classification (good bets): {y_train_class.mean():.2%}")
print(f"  Regression mean: {y_train_reg.mean():.4f}, std: {y_train_reg.std():.4f}")
print(f"  Strong signals (|y| > 0.5): {np.sum(np.abs(y_train_reg) > 0.5)} / {len(y_train_reg)} ({np.mean(np.abs(y_train_reg) > 0.5):.2%})")

## 8. Define Model (Phase 5 Architecture with Increased Capacity)

In [None]:
# Custom loss from Phase 4/5
class ConsistentPeakLoss(tf.keras.losses.Loss):
    def __init__(self, target_height=0.7, peak_threshold=0.5, peak_weight=0.5,
                 base_loss='mse', name="consistent_peak_loss"):
        super().__init__(name=name)
        self.target_height = target_height
        self.peak_threshold = peak_threshold
        self.peak_weight = peak_weight
        self.base_loss = base_loss
    
    def call(self, y_true, y_pred):
        y_true = tf.cast(y_true, tf.float32)
        y_pred = tf.cast(y_pred, tf.float32)
        y_true = tf.reshape(y_true, [-1])
        y_pred = tf.reshape(y_pred, [-1])
        
        # Base loss
        if self.base_loss == 'mse':
            base_loss = tf.reduce_mean(tf.square(y_true - y_pred))
        else:
            delta = 1.0
            abs_error = tf.abs(y_true - y_pred)
            quadratic = tf.minimum(abs_error, delta)
            linear = abs_error - quadratic
            base_loss = tf.reduce_mean(0.5 * tf.square(quadratic) + delta * linear)
        
        # Peak penalty
        strong_signal_mask = tf.cast(tf.abs(y_true) > self.peak_threshold, tf.float32)
        target_values = tf.sign(y_true) * self.target_height * strong_signal_mask
        signed_pred_magnitude = y_pred * tf.sign(y_true)
        weak_prediction_mask = tf.cast(signed_pred_magnitude < self.target_height, tf.float32)
        penalty_mask = strong_signal_mask * weak_prediction_mask
        peak_mse = tf.square(y_pred - target_values) * penalty_mask
        
        num_penalty_points = tf.reduce_sum(penalty_mask)
        peak_loss = tf.cond(
            tf.greater(num_penalty_points, 0),
            lambda: tf.reduce_sum(peak_mse) / num_penalty_points,
            lambda: 0.0
        )
        
        return base_loss + self.peak_weight * peak_loss
    
    def get_config(self):
        config = super().get_config()
        config.update({
            'target_height': self.target_height,
            'peak_threshold': self.peak_threshold,
            'peak_weight': self.peak_weight,
            'base_loss': self.base_loss
        })
        return config

def create_dual_branch_losses(classification_weight=0.4, regression_weight=0.6,
                              peak_threshold=0.5, target_height=0.7, peak_weight=0.5):
    classification_loss = 'binary_crossentropy'
    regression_loss = ConsistentPeakLoss(
        target_height=target_height,
        peak_threshold=peak_threshold,
        peak_weight=peak_weight,
        base_loss='mse'
    )
    
    loss_dict = {
        'classification_output': classification_loss,
        'regression_output': regression_loss
    }
    
    loss_weights = {
        'classification_output': classification_weight,
        'regression_output': regression_weight
    }
    
    return loss_dict, loss_weights

print("Custom losses defined.")

In [None]:
def create_multi_asset_model(
    input_shape: Tuple[int, int],
    cnn_filters: List[int] = [32, 64, 128],  # Increased from Phase 5
    lstm_units: int = 64,  # Increased from Phase 5
    dense_units: List[int] = [64, 32],  # Increased from Phase 5
    dropout_rate: float = 0.3,  # Reduced dropout (more data = less overfitting risk)
    l2_reg: float = 0.005,  # Reduced L2 (more data)
    name: str = "multi_asset_dual_branch"
) -> Model:
    """
    Dual-branch model with increased capacity for multi-asset training.
    
    Key changes from Phase 5:
    - More filters: 16→32→64 becomes 32→64→128
    - Larger LSTM: 32 → 64 units
    - Larger dense: [32,16] → [64,32]
    - Less regularization (dropout 0.4 → 0.3, L2 0.01 → 0.005)
    """
    inputs = layers.Input(shape=input_shape, name='input')
    
    # === Shared CNN Feature Extractor ===
    x = inputs
    for i, filters in enumerate(cnn_filters):
        x = layers.Conv1D(
            filters=filters,
            kernel_size=3,
            padding='same',
            activation=None,
            kernel_regularizer=regularizers.l2(l2_reg),
            name=f'conv1d_{i+1}'
        )(x)
        x = layers.BatchNormalization(name=f'bn_conv_{i+1}')(x)
        x = layers.Activation('relu', name=f'relu_conv_{i+1}')(x)
        x = layers.Dropout(dropout_rate, name=f'dropout_conv_{i+1}')(x)
    
    shared_features = x
    
    # === Classification Branch ===
    class_branch = layers.GlobalMaxPooling1D(name='class_pool')(shared_features)
    for i, units in enumerate(dense_units):
        class_branch = layers.Dense(
            units,
            activation=None,
            kernel_regularizer=regularizers.l2(l2_reg),
            name=f'class_dense_{i+1}'
        )(class_branch)
        class_branch = layers.BatchNormalization(name=f'bn_class_{i+1}')(class_branch)
        class_branch = layers.Activation('relu', name=f'relu_class_{i+1}')(class_branch)
        class_branch = layers.Dropout(dropout_rate, name=f'dropout_class_{i+1}')(class_branch)
    
    classification_output = layers.Dense(1, activation='sigmoid', name='classification_output')(class_branch)
    
    # === Regression Branch ===
    reg_branch = layers.Bidirectional(
        layers.LSTM(
            lstm_units,
            return_sequences=False,
            kernel_regularizer=regularizers.l2(l2_reg),
            recurrent_regularizer=regularizers.l2(l2_reg),
            dropout=dropout_rate,
            recurrent_dropout=dropout_rate * 0.5
        ),
        name='bilstm'
    )(shared_features)
    
    for i, units in enumerate(dense_units):
        reg_branch = layers.Dense(
            units,
            activation=None,
            kernel_regularizer=regularizers.l2(l2_reg),
            name=f'reg_dense_{i+1}'
        )(reg_branch)
        reg_branch = layers.BatchNormalization(name=f'bn_reg_{i+1}')(reg_branch)
        reg_branch = layers.Activation('relu', name=f'relu_reg_{i+1}')(reg_branch)
        reg_branch = layers.Dropout(dropout_rate, name=f'dropout_reg_{i+1}')(reg_branch)
    
    regression_output = layers.Dense(1, activation='tanh', name='regression_output')(reg_branch)
    
    model = Model(inputs=inputs, outputs=[classification_output, regression_output], name=name)
    return model

print("Multi-asset model architecture defined.")

## 9. Training Configuration

In [None]:
# Model configuration
MODEL_CONFIG = {
    'cnn_filters': [32, 64, 128],
    'lstm_units': 64,
    'dense_units': [64, 32],
    'dropout_rate': 0.3,
    'l2_reg': 0.005
}

# Loss configuration
LOSS_CONFIG = {
    'classification_weight': 0.4,
    'regression_weight': 0.6,
    'peak_threshold': 0.5,
    'target_height': 0.7,
    'peak_weight': 0.5
}

# Training configuration
TRAINING_CONFIG = {
    'epochs': 50,
    'batch_size': 128,  # Larger batch for more data
    'learning_rate': 0.001,
    'patience': 10  # More patience with more data
}

print("Training configuration:")
print(f"  Model: {MODEL_CONFIG}")
print(f"  Loss: {LOSS_CONFIG}")
print(f"  Training: {TRAINING_CONFIG}")

## 10. Create and Compile Model

In [None]:
# Create model
set_seed(42)
model = create_multi_asset_model(
    input_shape=(X_train_scaled.shape[1], X_train_scaled.shape[2]),
    **MODEL_CONFIG
)

# Compile
loss_dict, loss_weights = create_dual_branch_losses(**LOSS_CONFIG)

model.compile(
    optimizer=Adam(learning_rate=TRAINING_CONFIG['learning_rate']),
    loss=loss_dict,
    loss_weights=loss_weights,
    metrics={
        'classification_output': ['accuracy', tf.keras.metrics.AUC(name='auc')],
        'regression_output': ['mae', 'mse']
    }
)

# Model summary
model.summary()

## 11. Setup Callbacks

In [None]:
checkpoint_path = PHASE6_MODEL_DIR / 'best_model_multi_asset.keras'
log_dir = PHASE6_MODEL_DIR / 'logs'
log_dir.mkdir(exist_ok=True)

callbacks_list = [
    callbacks.ModelCheckpoint(
        filepath=checkpoint_path,
        monitor='val_loss',
        save_best_only=True,
        mode='min',
        verbose=1
    ),
    callbacks.EarlyStopping(
        monitor='val_loss',
        patience=TRAINING_CONFIG['patience'],
        restore_best_weights=True,
        verbose=1
    ),
    callbacks.ReduceLROnPlateau(
        monitor='val_loss',
        factor=0.5,
        patience=5,
        min_lr=1e-6,
        verbose=1
    ),
    callbacks.CSVLogger(PHASE6_MODEL_DIR / 'training_history.csv')
]

print("Callbacks configured.")

## 12. Train Model

In [None]:
print("Starting training...\n")

history = model.fit(
    X_train_scaled,
    {
        'classification_output': y_train_class,
        'regression_output': y_train_reg
    },
    validation_data=(
        X_val_scaled,
        {
            'classification_output': y_val_class,
            'regression_output': y_val_reg
        }
    ),
    batch_size=TRAINING_CONFIG['batch_size'],
    epochs=TRAINING_CONFIG['epochs'],
    callbacks=callbacks_list,
    verbose=1
)

print("\nTraining complete!")
print(f"  Best val_loss: {min(history.history['val_loss']):.4f}")
print(f"  Final train_loss: {history.history['loss'][-1]:.4f}")
print(f"  Final val_loss: {history.history['val_loss'][-1]:.4f}")
print(f"  Overfitting gap: {history.history['val_loss'][-1] - history.history['loss'][-1]:.4f}")

## 13. Evaluate on Combined Test Set

In [None]:
# Get predictions
y_pred_class, y_pred_reg = model.predict(X_test_scaled, verbose=0)
y_pred_class = y_pred_class.flatten()
y_pred_reg = y_pred_reg.flatten()
y_pred_class_binary = (y_pred_class > 0.5).astype(int)

# Calculate metrics
test_metrics = {
    'classification': {
        'accuracy': float(accuracy_score(y_test_class, y_pred_class_binary)),
        'precision': float(precision_score(y_test_class, y_pred_class_binary, zero_division=0)),
        'recall': float(recall_score(y_test_class, y_pred_class_binary, zero_division=0)),
        'f1_score': float(f1_score(y_test_class, y_pred_class_binary, zero_division=0))
    },
    'regression': {
        'mae': float(mean_absolute_error(y_test_reg, y_pred_reg)),
        'mse': float(mean_squared_error(y_test_reg, y_pred_reg)),
        'rmse': float(np.sqrt(mean_squared_error(y_test_reg, y_pred_reg))),
        'r2': float(r2_score(y_test_reg, y_pred_reg)),
        'direction_accuracy': float(np.mean(np.sign(y_test_reg) == np.sign(y_pred_reg)))
    }
}

print("\n" + "="*80)
print("PHASE 6 COMBINED TEST METRICS")
print("="*80)
print(json.dumps(test_metrics, indent=2))

# Signal strength analysis
strong_mask = np.abs(y_test_reg) > 0.5
weak_mask = np.abs(y_test_reg) <= 0.5

print("\n" + "="*80)
print("SIGNAL STRENGTH ANALYSIS (Combined)")
print("="*80)

if strong_mask.sum() > 0:
    mae_strong = mean_absolute_error(y_test_reg[strong_mask], y_pred_reg[strong_mask])
    dir_acc_strong = np.mean(np.sign(y_test_reg[strong_mask]) == np.sign(y_pred_reg[strong_mask]))
    avg_conf = np.abs(y_pred_reg[strong_mask]).mean()
    
    print(f"\nStrong Signals (|y| > 0.5): {strong_mask.sum()} samples")
    print(f"  MAE: {mae_strong:.4f}")
    print(f"  Direction Accuracy: {dir_acc_strong:.2%}")
    print(f"  Avg Prediction Confidence: {avg_conf:.4f}")

if weak_mask.sum() > 0:
    mae_weak = mean_absolute_error(y_test_reg[weak_mask], y_pred_reg[weak_mask])
    dir_acc_weak = np.mean(np.sign(y_test_reg[weak_mask]) == np.sign(y_pred_reg[weak_mask]))
    
    print(f"\nWeak Signals (|y| ≤ 0.5): {weak_mask.sum()} samples")
    print(f"  MAE: {mae_weak:.4f}")
    print(f"  Direction Accuracy: {dir_acc_weak:.2%}")

## 14. Per-Asset Performance Analysis

Test generalization across different assets

In [None]:
per_asset_results = {}

print("\n" + "="*80)
print("PER-ASSET PERFORMANCE")
print("="*80)

for asset in splits_by_asset.keys():
    # Get asset test data (already scaled in combined set)
    asset_splits = splits_by_asset[asset]
    X_test_asset = scale_data(asset_splits['X_test'])
    y_test_asset = asset_splits['y_test']
    
    # Predict
    y_pred_class_asset, y_pred_reg_asset = model.predict(X_test_asset, verbose=0)
    y_pred_reg_asset = y_pred_reg_asset.flatten()
    
    # Metrics
    dir_acc = np.mean(np.sign(y_test_asset) == np.sign(y_pred_reg_asset))
    mae = mean_absolute_error(y_test_asset, y_pred_reg_asset)
    
    # Strong signals
    strong_mask_asset = np.abs(y_test_asset) > 0.5
    if strong_mask_asset.sum() > 0:
        dir_acc_strong_asset = np.mean(np.sign(y_test_asset[strong_mask_asset]) == np.sign(y_pred_reg_asset[strong_mask_asset]))
        mae_strong_asset = mean_absolute_error(y_test_asset[strong_mask_asset], y_pred_reg_asset[strong_mask_asset])
    else:
        dir_acc_strong_asset = None
        mae_strong_asset = None
    
    per_asset_results[asset] = {
        'test_samples': len(y_test_asset),
        'direction_accuracy': float(dir_acc),
        'mae': float(mae),
        'strong_signals': {
            'count': int(strong_mask_asset.sum()),
            'direction_accuracy': float(dir_acc_strong_asset) if dir_acc_strong_asset is not None else None,
            'mae': float(mae_strong_asset) if mae_strong_asset is not None else None
        }
    }
    
    print(f"\n{asset}:")
    print(f"  Test samples: {len(y_test_asset)}")
    print(f"  Direction Accuracy: {dir_acc:.2%}")
    print(f"  MAE: {mae:.4f}")
    if dir_acc_strong_asset is not None:
        print(f"  Strong Signal Dir Acc: {dir_acc_strong_asset:.2%} ({strong_mask_asset.sum()} samples)")

print("\n" + "="*80)

## 15. Save Results

In [None]:
# Save model
model.save(PHASE6_MODEL_DIR / 'final_model_multi_asset.keras')
print(f"Model saved to {PHASE6_MODEL_DIR / 'final_model_multi_asset.keras'}")

# Save predictions
np.save(PHASE6_MODEL_DIR / 'y_test_class_pred.npy', y_pred_class)
np.save(PHASE6_MODEL_DIR / 'y_test_reg_pred.npy', y_pred_reg)
print("Predictions saved.")

# Save results
results = {
    'model_config': MODEL_CONFIG,
    'loss_config': LOSS_CONFIG,
    'training_config': TRAINING_CONFIG,
    'data_info': {
        'assets': ASSETS,
        'total_train_samples': len(X_train_combined),
        'total_val_samples': len(X_val_combined),
        'total_test_samples': len(X_test_combined),
        'window_size': WINDOW_SIZE,
        'n_features': X_train_scaled.shape[2]
    },
    'combined_test_metrics': test_metrics,
    'per_asset_results': per_asset_results,
    'signal_strength_analysis': {
        'strong_signals': {
            'count': int(strong_mask.sum()),
            'mae': float(mae_strong) if strong_mask.sum() > 0 else None,
            'direction_accuracy': float(dir_acc_strong) if strong_mask.sum() > 0 else None
        },
        'weak_signals': {
            'count': int(weak_mask.sum()),
            'mae': float(mae_weak) if weak_mask.sum() > 0 else None,
            'direction_accuracy': float(dir_acc_weak) if weak_mask.sum() > 0 else None
        }
    },
    'training_info': {
        'epochs_trained': len(history.history['loss']),
        'best_val_loss': float(min(history.history['val_loss'])),
        'final_train_loss': float(history.history['loss'][-1]),
        'final_val_loss': float(history.history['val_loss'][-1]),
        'overfitting_gap': float(history.history['val_loss'][-1] - history.history['loss'][-1])
    }
}

with open(PHASE6_MODEL_DIR / 'results.json', 'w') as f:
    json.dump(results, f, indent=2)
print(f"Results saved to {PHASE6_MODEL_DIR / 'results.json'}")

print("\n" + "="*80)
print("Phase 6 Complete!")
print("="*80)
print("\nKey Achievements:")
print(f"  ✓ Multi-asset training: {len(ASSETS)} FX pairs")
print(f"  ✓ Total samples: {len(X_train_combined):,} (train)")
print(f"  ✓ Increased model capacity: {MODEL_CONFIG}")
print(f"  ✓ Asset-specific embeddings")
print(f"  ✓ Per-asset performance analysis")
print(f"\n  Overfitting gap: {results['training_info']['overfitting_gap']:.4f}")
print(f"  Strong signal direction accuracy: {dir_acc_strong:.2%}" if strong_mask.sum() > 0 else "  No strong signals in test")
print("\nNext Steps:")
print("  - Compare Phase 5 vs Phase 6 (single vs multi-asset)")
print("  - Backtest with meta-labeling")
print("  - Deploy for live trading")