# Model Prototyping for FLIP

Test and compare different ML models:
- XGBoost classifier
- Neural network (TensorFlow/Keras)
- Ensemble methods
- Model performance metrics

In [1]:
import sys
import os
from pathlib import Path

# Add paths
notebook_dir = Path.cwd()
if 'research' in str(notebook_dir):
    project_root = notebook_dir.parent.parent
else:
    project_root = notebook_dir

collector_path = project_root / 'data-pipeline' / 'collector'
training_path = project_root / 'ml' / 'training'
if collector_path.exists():
    sys.path.insert(0, str(collector_path))
if training_path.exists():
    sys.path.insert(0, str(training_path))

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score, confusion_matrix
import warnings
warnings.filterwarnings('ignore')

try:
    import xgboost as xgb
    XGBOOST_AVAILABLE = True
except ImportError:
    XGBOOST_AVAILABLE = False
    print("‚ö†Ô∏è XGBoost not available")

try:
    import tensorflow as tf
    TENSORFLOW_AVAILABLE = True
except ImportError:
    TENSORFLOW_AVAILABLE = False
    print("‚ö†Ô∏è TensorFlow not available")

print("‚úÖ Libraries imported successfully")

‚ö†Ô∏è XGBoost not available
‚ö†Ô∏è TensorFlow not available
‚úÖ Libraries imported successfully


## 1. Generate Synthetic Training Data

For prototyping, we'll generate synthetic data that mimics real redemption patterns.


In [None]:
# Generate synthetic training dataset
np.random.seed(42)
n_samples = 10000

# Generate features
X = pd.DataFrame({
    'volatility_1h': np.random.gamma(2, 0.01, n_samples),
    'volatility_24h': np.random.gamma(2, 0.01, n_samples),
    'redemption_success_rate': np.random.beta(95, 5, n_samples),
    'fdc_latency_mean': np.random.normal(240, 60, n_samples),
    'fdc_latency_p95': np.random.normal(300, 80, n_samples),
    'fdc_latency_p99': np.random.normal(360, 100, n_samples),
    'hour_sin': np.sin(2 * np.pi * np.random.randint(0, 24, n_samples) / 24),
    'hour_cos': np.cos(2 * np.pi * np.random.randint(0, 24, n_samples) / 24),
    'day_of_week': np.random.randint(0, 7, n_samples),
    'is_weekend': np.random.choice([0, 1], n_samples, p=[0.7, 0.3]),
    'agent_success_rate': np.random.beta(96, 4, n_samples),
    'redemption_amount': np.random.lognormal(10, 1, n_samples),
    'gas_utilization': np.random.beta(5, 5, n_samples),
})

# Generate labels with realistic success probability
# Higher success rate when:
# - Low volatility
# - High recent success rate
# - Low FDC latency
# - Good agent performance
success_prob = (
    0.95 +
    0.02 * (X['redemption_success_rate'] - 0.95) +
    0.01 * (1 - X['volatility_24h'] / 0.1) +
    0.01 * (1 - X['fdc_latency_mean'] / 300) +
    0.01 * (X['agent_success_rate'] - 0.95) +
    np.random.normal(0, 0.01, n_samples)
)
success_prob = np.clip(success_prob, 0, 1)
y = (np.random.random(n_samples) < success_prob).astype(int)

# Split data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)

print(f"üìä Dataset Statistics:")
print(f"Training samples: {len(X_train)}")
print(f"Test samples: {len(X_test)}")
print(f"Features: {len(X.columns)}")
print(f"Success rate (train): {y_train.mean():.2%}")
print(f"Success rate (test): {y_test.mean():.2%}")
print(f"\nFeature names: {list(X.columns)}")


## 2. XGBoost Model


In [None]:
if XGBOOST_AVAILABLE:
    print("Training XGBoost model...")
    
    # Train XGBoost
    xgb_model = xgb.XGBClassifier(
        max_depth=6,
        learning_rate=0.1,
        n_estimators=100,
        objective='binary:logistic',
        eval_metric='logloss',
        random_state=42,
        n_jobs=-1
    )
    
    xgb_model.fit(
        X_train, y_train,
        eval_set=[(X_test, y_test)],
        verbose=False
    )
    
    # Predictions
    y_pred_xgb = xgb_model.predict(X_test)
    y_pred_proba_xgb = xgb_model.predict_proba(X_test)[:, 1]
    
    # Metrics
    xgb_metrics = {
        'accuracy': accuracy_score(y_test, y_pred_xgb),
        'precision': precision_score(y_test, y_pred_xgb, zero_division=0),
        'recall': recall_score(y_test, y_pred_xgb, zero_division=0),
        'f1': f1_score(y_test, y_pred_xgb, zero_division=0),
        'roc_auc': roc_auc_score(y_test, y_pred_proba_xgb) if len(np.unique(y_test)) > 1 else 0,
    }
    
    print(f"\nüìä XGBoost Metrics:")
    for metric, value in xgb_metrics.items():
        print(f"{metric.capitalize()}: {value:.4f}")
    
    # Feature importance
    feature_importance = pd.DataFrame({
        'feature': X.columns,
        'importance': xgb_model.feature_importances_
    }).sort_values('importance', ascending=False)
    
    plt.figure(figsize=(10, 6))
    plt.barh(feature_importance['feature'], feature_importance['importance'])
    plt.xlabel('Importance')
    plt.title('XGBoost Feature Importance', fontsize=14, fontweight='bold')
    plt.gca().invert_yaxis()
    plt.tight_layout()
    plt.show()
    
    print(f"\nüîù Top 5 Features:")
    print(feature_importance.head())
else:
    print("‚ö†Ô∏è XGBoost not available - skipping")
    xgb_metrics = {}
    xgb_model = None


## 3. Neural Network Model


In [None]:
if TENSORFLOW_AVAILABLE:
    print("Training Neural Network model...")
    
    # Build neural network
    nn_model = tf.keras.Sequential([
        tf.keras.layers.Dense(64, activation='relu', input_shape=(X_train.shape[1],)),
        tf.keras.layers.Dropout(0.2),
        tf.keras.layers.Dense(32, activation='relu'),
        tf.keras.layers.Dropout(0.2),
        tf.keras.layers.Dense(16, activation='relu'),
        tf.keras.layers.Dense(1, activation='sigmoid')
    ])
    
    nn_model.compile(
        optimizer='adam',
        loss='binary_crossentropy',
        metrics=['accuracy']
    )
    
    # Train
    history = nn_model.fit(
        X_train, y_train,
        epochs=50,
        batch_size=32,
        validation_data=(X_test, y_test),
        verbose=0
    )
    
    # Predictions
    y_pred_nn = (nn_model.predict(X_test, verbose=0) > 0.5).astype(int).flatten()
    y_pred_proba_nn = nn_model.predict(X_test, verbose=0).flatten()
    
    # Metrics
    nn_metrics = {
        'accuracy': accuracy_score(y_test, y_pred_nn),
        'precision': precision_score(y_test, y_pred_nn, zero_division=0),
        'recall': recall_score(y_test, y_pred_nn, zero_division=0),
        'f1': f1_score(y_test, y_pred_nn, zero_division=0),
        'roc_auc': roc_auc_score(y_test, y_pred_proba_nn) if len(np.unique(y_test)) > 1 else 0,
    }
    
    print(f"\nüìä Neural Network Metrics:")
    for metric, value in nn_metrics.items():
        print(f"{metric.capitalize()}: {value:.4f}")
    
    # Plot training history
    fig, axes = plt.subplots(1, 2, figsize=(14, 5))
    
    axes[0].plot(history.history['loss'], label='Train Loss')
    axes[0].plot(history.history['val_loss'], label='Val Loss')
    axes[0].set_title('Model Loss', fontsize=12, fontweight='bold')
    axes[0].set_xlabel('Epoch')
    axes[0].set_ylabel('Loss')
    axes[0].legend()
    axes[0].grid(True, alpha=0.3)
    
    axes[1].plot(history.history['accuracy'], label='Train Accuracy')
    axes[1].plot(history.history['val_accuracy'], label='Val Accuracy')
    axes[1].set_title('Model Accuracy', fontsize=12, fontweight='bold')
    axes[1].set_xlabel('Epoch')
    axes[1].set_ylabel('Accuracy')
    axes[1].legend()
    axes[1].grid(True, alpha=0.3)
    
    plt.tight_layout()
    plt.show()
else:
    print("‚ö†Ô∏è TensorFlow not available - skipping")
    nn_metrics = {}
    nn_model = None


## 4. Ensemble Model


In [None]:
# Ensemble: Average predictions from multiple models
if XGBOOST_AVAILABLE and TENSORFLOW_AVAILABLE and xgb_model is not None and nn_model is not None:
    print("Creating ensemble model...")
    
    # Get predictions from both models
    xgb_proba = xgb_model.predict_proba(X_test)[:, 1]
    nn_proba = nn_model.predict(X_test, verbose=0).flatten()
    
    # Average predictions
    ensemble_proba = (xgb_proba + nn_proba) / 2
    ensemble_pred = (ensemble_proba > 0.5).astype(int)
    
    # Metrics
    ensemble_metrics = {
        'accuracy': accuracy_score(y_test, ensemble_pred),
        'precision': precision_score(y_test, ensemble_pred, zero_division=0),
        'recall': recall_score(y_test, ensemble_pred, zero_division=0),
        'f1': f1_score(y_test, ensemble_pred, zero_division=0),
        'roc_auc': roc_auc_score(y_test, ensemble_proba) if len(np.unique(y_test)) > 1 else 0,
    }
    
    print(f"\nüìä Ensemble Metrics:")
    for metric, value in ensemble_metrics.items():
        print(f"{metric.capitalize()}: {value:.4f}")
    
    # Compare predictions
    fig, axes = plt.subplots(1, 2, figsize=(14, 5))
    
    # Prediction probability distribution
    axes[0].hist(ensemble_proba[y_test == 1], bins=30, alpha=0.7, label='Success', color='green')
    axes[0].hist(ensemble_proba[y_test == 0], bins=30, alpha=0.7, label='Failure', color='red')
    axes[0].set_title('Ensemble Prediction Probability Distribution', fontsize=12, fontweight='bold')
    axes[0].set_xlabel('Predicted Probability')
    axes[0].set_ylabel('Frequency')
    axes[0].legend()
    axes[0].grid(True, alpha=0.3)
    
    # Confusion matrix
    cm = confusion_matrix(y_test, ensemble_pred)
    sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', ax=axes[1])
    axes[1].set_title('Ensemble Confusion Matrix', fontsize=12, fontweight='bold')
    axes[1].set_xlabel('Predicted')
    axes[1].set_ylabel('Actual')
    
    plt.tight_layout()
    plt.show()
else:
    print("‚ö†Ô∏è Cannot create ensemble - need both XGBoost and Neural Network models")
    ensemble_metrics = {}


## 5. Model Comparison


In [None]:
# Compare all models
all_metrics = {}
if xgb_metrics:
    all_metrics['XGBoost'] = xgb_metrics
if nn_metrics:
    all_metrics['Neural Network'] = nn_metrics
if ensemble_metrics:
    all_metrics['Ensemble'] = ensemble_metrics

if all_metrics:
    comparison_df = pd.DataFrame(all_metrics).T
    print("\nüìä Model Comparison:")
    print(comparison_df.round(4))
    
    # Visualization
    fig, axes = plt.subplots(2, 2, figsize=(14, 10))
    
    metrics_to_plot = ['accuracy', 'precision', 'recall', 'f1']
    for idx, metric in enumerate(metrics_to_plot):
        ax = axes[idx // 2, idx % 2]
        comparison_df[metric].plot(kind='bar', ax=ax, color=['steelblue', 'orange', 'green'])
        ax.set_title(f'{metric.capitalize()} Comparison', fontsize=12, fontweight='bold')
        ax.set_ylabel(metric.capitalize())
        ax.set_ylim([0, 1])
        ax.grid(True, alpha=0.3, axis='y')
        ax.tick_params(axis='x', rotation=0)
    
    plt.tight_layout()
    plt.show()
    
    # Best model
    best_model = comparison_df['accuracy'].idxmax()
    print(f"\nüèÜ Best Model (by accuracy): {best_model}")
    print(f"Accuracy: {comparison_df.loc[best_model, 'accuracy']:.4f}")
else:
    print("‚ö†Ô∏è No models available for comparison")


## 6. Summary

### Key Findings:

1. **XGBoost**: Fast training, good interpretability, feature importance available
2. **Neural Network**: Can capture complex patterns, requires more tuning
3. **Ensemble**: Often provides best performance by combining strengths

### Next Steps:

- Tune hyperparameters for best model
- Validate on real historical data
- Implement conformal prediction for confidence intervals
- Deploy best model for production use
