In [None]:
import numpy as np
import pandas as pd
import joblib
from tensorflow.keras.models import load_model
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras.preprocessing.sequence import TimeseriesGenerator

def evaluate_lstm(model_path, X_test, y_test, coin_column='coin'):
    """
    Evaluate LSTM model with Embedding layer for coin IDs
    
    Args:
        model_path: Path to saved LSTM model (.pkl)
        X_test: Test features (must include 'coin' column)
        y_test: Test labels
        coin_column: Name of column containing coin IDs
        
    Returns:
        Dictionary with metrics and predictions
    """
    # Load model and associated components
    model_data = joblib.load(model_path)
    lstm_model = model_data['model']
    coin_encoder = model_data['coin_encoder']  # LabelEncoder for coins
    feature_scaler = model_data['feature_scaler']  # Scaler for other features
    seq_length = model_data.get('seq_length', 10)  # Default 10 if not stored
    
    # Prepare test data
    X_test = X_test.copy()
    
    # 1. Encode coins
    X_test['coin_code'] = coin_encoder.transform(X_test[coin_column])
    
    # 2. Scale numerical features
    num_features = ['rsi_14', 'volumeto', 'macd_line']  # Update with your actual features
    X_test[num_features] = feature_scaler.transform(X_test[num_features])
    
    # 3. Create sequences
    test_gen = TimeseriesGenerator(
        data=X_test[['coin_code'] + num_features].values,
        targets=y_test.values,
        length=seq_length,
        batch_size=len(X_test)
    
    # Make predictions
    y_proba = lstm_model.predict(test_gen[0][0]).flatten()
    y_pred = (y_proba > 0.5).astype(int)
    
    # Calculate metrics
    return {
        'predictions': y_pred,
        'probabilities': y_proba,
        'metrics': {
            'accuracy': accuracy_score(y_test, y_pred),
            'precision': precision_score(y_test, y_pred),
            'recall': recall_score(y_test, y_pred),
            'f1': f1_score(y_test, y_pred),
            'auc': roc_auc_score(y_test, y_proba)
        }
    }

# Example Usage:
if __name__ == "__main__":
    # Assuming you have:
    # - X_test (DataFrame with 'coin' column and features)
    # - y_test (labels)
    
    lstm_results = evaluate_lstm(
        model_path='models/lstm_model.pkl',
        X_test=X_test,
        y_test=y_test
    )
    
    print("LSTM Performance Metrics:")
    print(pd.DataFrame([lstm_results['metrics']]))
    
    # To compare with other models:
    model_paths = {
        'Logistic Regression': 'models/lr_model.pkl',
        'Decision Tree': 'models/dt_model.pkl',
        'Random Forest': 'models/rf_model.pkl',
        'XGBoost': 'models/xgb_model.pkl',
        'LSTM': 'models/lstm_model.pkl'
    }
    
    all_results = {}
    for name, path in model_paths.items():
        if name == 'LSTM':
            all_results[name] = evaluate_lstm(path, X_test, y_test)['metrics']
        else:
            model = joblib.load(path)
            y_pred = model.predict(X_test)
            y_proba = model.predict_proba(X_test)[:,1] if hasattr(model, 'predict_proba') else None
            
            metrics = {
                'accuracy': accuracy_score(y_test, y_pred),
                'precision': precision_score(y_test, y_pred),
                'recall': recall_score(y_test, y_pred),
                'f1': f1_score(y_test, y_pred)
            }
            if y_proba is not None:
                metrics['auc'] = roc_auc_score(y_test, y_proba)
            
            all_results[name] = metrics
    
    # Create comparison table
    results_df = pd.DataFrame(all_results).T
    print("\nModel Comparison:")
    print(results_df.sort_values('auc', ascending=False))