# Aerospace Component Failure Prediction Model Evaluation

This notebook evaluates the performance of the trained LSTM model for predicting aerospace component maintenance events. It provides comprehensive metrics, visualizations, and analysis of the model's predictive capabilities.

## Key Features:
- Model performance evaluation with MSE, RMSE, MAE metrics
- Binary classification metrics for maintenance event prediction
- Comprehensive visualization suite including density plots, confusion matrices, and error analysis
- Sample prediction comparisons and training history analysis

In [None]:
import os
import numpy as np
import pandas as pd
import tensorflow as tf
import matplotlib.pyplot as plt
import seaborn as sns
from scipy.stats import gaussian_kde
import gc

# Output directory
output_dir = os.path.join('private', 'data', 'training_data')
viz_dir = os.path.join(output_dir, 'visualizations')
os.makedirs(viz_dir, exist_ok=True)

# Load the trained model with custom_objects
model_path = os.path.join(output_dir, 'time_based_maintenance_model.h5')
model = tf.keras.models.load_model(
    model_path,
    custom_objects={
        'mse': tf.keras.losses.MeanSquaredError(),
        'mae': tf.keras.metrics.MeanAbsoluteError()
    }
)
print(f"Loaded model from {model_path}")

# Load training history
hist_csv_file = os.path.join(output_dir, 'training_history.csv')
if os.path.exists(hist_csv_file):
    history_df = pd.read_csv(hist_csv_file)
    
    # Create a dictionary-based history object
    history = {'history': {}}
    for col in history_df.columns:
        history['history'][col] = history_df[col].values
    
    print(f"Loaded training history from {hist_csv_file}")

# Load test data
test_data_path = os.path.join(output_dir, 'test_data.npz')
if os.path.exists(test_data_path):
    test_data_loaded = np.load(test_data_path, allow_pickle=True)
    X_test = test_data_loaded['X_test']
    y_test = test_data_loaded['y_test'] 
    y_test_original = test_data_loaded['y_test_original']
    test_data = (X_test, y_test, y_test_original)
    print(f"Loaded test data from {test_data_path}")

# Define evaluation functions
def evaluate_model(model, test_data, output_dir):
    X_test, y_test, y_test_original = test_data
    
    # Generate predictions
    batch_size = 128  # Batch size for prediction
    num_batches = int(np.ceil(len(X_test) / batch_size))
    y_pred_list = []
    
    for i in range(num_batches):
        start_idx = i * batch_size
        end_idx = min((i + 1) * batch_size, len(X_test))
        batch_pred = model.predict(X_test[start_idx:end_idx], verbose=0)
        y_pred_list.append(batch_pred)
        
        # Print Progress
        if (i+1) % 10 == 0:
            print(f"Processed {i+1}/{num_batches} batches")
    
    # Combine predictions
    y_pred_log = np.vstack(y_pred_list).flatten()
    # Inverse log transformation
    y_pred = np.expm1(y_pred_log)
    # Make sure predictions are non-negative
    y_pred = np.maximum(0, y_pred)
    
    # Calculate metrics on original scale
    mse = np.mean((y_test_original - y_pred)**2)
    rmse = np.sqrt(mse)
    mae = np.mean(np.abs(y_test_original - y_pred))
    
    # Calculate MAPE (Mean Absolute Percentage Error)
    non_zero_idx = y_test_original > 0
    if non_zero_idx.sum() > 0:
        mape = np.mean(np.abs((y_test_original[non_zero_idx] - y_pred[non_zero_idx]) / 
                              y_test_original[non_zero_idx])) * 100
        print(f"MAPE (non-zero values): {mape:.2f}%")
    
    threshold = 0.5  # Threshold for predicted maintenance event count
    y_test_binary = (y_test_original > 0).astype(int)
    y_pred_binary = (y_pred > threshold).astype(int)
    
    # Calculate metrics for binary classification
    true_pos = np.sum((y_test_binary == 1) & (y_pred_binary == 1))
    false_pos = np.sum((y_test_binary == 0) & (y_pred_binary == 1))
    true_neg = np.sum((y_test_binary == 0) & (y_pred_binary == 0))
    false_neg = np.sum((y_test_binary == 1) & (y_pred_binary == 0))
    
    # Calculate precision, recall, and F1 score
    precision = true_pos / (true_pos + false_pos) if (true_pos + false_pos) > 0 else 0
    recall = true_pos / (true_pos + false_neg) if (true_pos + false_neg) > 0 else 0
    f1 = 2 * (precision * recall) / (precision + recall) if (precision + recall) > 0 else 0
    
    # Print metrics
    print("\nTest Set Evaluation:")
    print(f"MSE: {mse:.4f}")
    print(f"RMSE: {rmse:.4f}")
    print(f"MAE: {mae:.4f}")
    
    print("\nBinary maintenance event prediction metrics:")
    print(f"Precision: {precision:.4f}")
    print(f"Recall: {recall:.4f}")
    print(f"F1 Score: {f1:.4f}")
    print(f"True Positives: {true_pos}")
    print(f"False Positives: {false_pos}")
    print(f"True Negatives: {true_neg}")
    print(f"False Negatives: {false_neg}")
    
    return {
        'mse': mse,
        'rmse': rmse,
        'mae': mae,
        'precision': precision,
        'recall': recall,
        'f1': f1,
        'mape': mape if 'mape' in locals() else None,
        'y_pred': y_pred  # Return predictions for visualization
    }

def generate_model_evaluation_plots(test_data, y_pred, output_dir):
    _, _, y_test_original = test_data
    
    # Sample data for visualization
    max_plot_points = min(5000, len(y_test_original))
    if len(y_test_original) > max_plot_points:
        np.random.seed(42)  # For reproducibility
        indices = np.random.choice(len(y_test_original), max_plot_points, replace=False)
        plot_y_test = y_test_original[indices]
        plot_y_pred = y_pred[indices]
    else:
        plot_y_test = y_test_original
        plot_y_pred = y_pred
    
    errors = plot_y_pred - plot_y_test
    
    # Error Histogram
    plt.figure(figsize=(10, 8))
    plt.hist(errors, bins=30, alpha=0.7, color='blue')
    plt.axvline(x=0, color='r', linestyle='--')
    plt.xlabel('Prediction Error')
    plt.ylabel('Frequency')
    plt.title('Distribution of Prediction Errors')
    plt.grid(True, alpha=0.3)
    plt.tight_layout()
    plt.savefig(os.path.join(viz_dir, 'error_distribution.png'))
    plt.close()
    
    # Confusion matrix is for binary classification 
    # Confusion Matrix
    y_test_binary = (plot_y_test > 0).astype(int)
    y_pred_binary = (plot_y_pred > 0.5).astype(int)
    cm = np.zeros((2, 2), dtype=int)
    cm[0, 0] = np.sum((y_test_binary == 0) & (y_pred_binary == 0))  # TN
    cm[0, 1] = np.sum((y_test_binary == 0) & (y_pred_binary == 1))  # FP
    cm[1, 0] = np.sum((y_test_binary == 1) & (y_pred_binary == 0))  # FN
    cm[1, 1] = np.sum((y_test_binary == 1) & (y_pred_binary == 1))  # TP
    
    plt.figure(figsize=(8, 6))
    sns.heatmap(cm, annot=True, fmt="d", cmap="Blues", 
                xticklabels=['No Maintenance', 'Maintenance'],
                yticklabels=['No Maintenance', 'Maintenance'])
    plt.xlabel('Predicted')
    plt.ylabel('Actual')
    plt.title('Confusion Matrix')
    plt.tight_layout()
    plt.savefig(os.path.join(viz_dir, 'confusion_matrix.png'))
    plt.close()
    
    return plot_y_test, plot_y_pred, errors

def generate_density_plots(plot_y_test, plot_y_pred, errors, output_dir):   
    plt.figure(figsize=(10, 8))
    try:
        # Create x,y grid
        xy = np.vstack([plot_y_test, plot_y_pred])
        z = gaussian_kde(xy)(xy)
        
        # Sort points by density for better visualization
        idx = z.argsort()
        x, y, z = plot_y_test[idx], plot_y_pred[idx], z[idx]
        
        scatter = plt.scatter(x, y, c=z, s=30, cmap='viridis', alpha=0.8)
        plt.colorbar(scatter, label='Density')
    except Exception as e:
        print(f"Could not create density scatter plot, falling back to simple scatter: {e}")
        plt.scatter(plot_y_test, plot_y_pred, alpha=0.5, color='blue')
    
    max_val = max(np.max(plot_y_test), np.max(plot_y_pred))
    plt.plot([0, max_val], [0, max_val], 'r--', linewidth=2)
    plt.xlabel('Actual Maintenance Events')
    plt.ylabel('Predicted Maintenance Events')
    plt.title('Actual vs. Predicted Maintenance Events (Density)')
    plt.grid(True, alpha=0.3)
    plt.tight_layout()
    plt.savefig(os.path.join(viz_dir, 'density_scatter.png'))
    plt.close()
    
    # Low Maintenance Event Range Scatter Plot
    plt.figure(figsize=(10, 8))
    small_mask = (plot_y_test <= 5) & (plot_y_pred <= 10)
    plt.scatter(plot_y_test[small_mask], plot_y_pred[small_mask], alpha=0.6, color='green')
    plt.plot([0, 5], [0, 5], 'r--', linewidth=2)
    plt.xlabel('Actual Maintenance Events (0-5)')
    plt.ylabel('Predicted Maintenance Events (0-10)')
    plt.title('Low Maintenance Event Count Prediction')
    plt.grid(True, alpha=0.3)
    plt.tight_layout()
    plt.savefig(os.path.join(viz_dir, 'low_maintenance_prediction.png'))
    plt.close()
    
    # Error by Actual Maintenance Events
    plt.figure(figsize=(10, 8))
    y_test_clip = np.clip(plot_y_test, 0, np.percentile(plot_y_test, 95))
    error_clip = np.clip(errors, np.percentile(errors, 5), np.percentile(errors, 95))
    plt.scatter(y_test_clip, error_clip, color='purple', alpha=0.4, s=30)
    plt.axhline(y=0, color='r', linestyle='--', linewidth=2)
    plt.xlabel('Actual Maintenance Events (clipped to 95th percentile)')
    plt.ylabel('Prediction Error (clipped 5-95 percentile)')
    plt.title('Error by Actual Maintenance Event Count')
    plt.grid(True, alpha=0.3)
    plt.tight_layout()
    plt.savefig(os.path.join(viz_dir, 'error_by_actual.png'))
    plt.close()

def generate_sample_predictions(model, test_data, output_dir):
    X_test_sample, y_test_sample, y_test_original_sample = test_data
    
    # Take a sample for demonstration
    sample_size = min(10, len(X_test_sample))
    sample_indices = np.random.choice(len(X_test_sample), sample_size, replace=False)
    
    X_sample = X_test_sample[sample_indices]
    y_sample_log = y_test_sample[sample_indices]
    y_sample_original = y_test_original_sample[sample_indices]
    
    # Generate predictions
    y_pred_log = model.predict(X_sample)
    y_pred = np.expm1(y_pred_log)  # Inverse log transform
    
    # Calculate errors
    errors = y_pred.flatten() - y_sample_original
    percent_errors = np.zeros_like(errors)
    for i in range(len(errors)):
        if y_sample_original[i] > 0:
            percent_errors[i] = (errors[i] / y_sample_original[i]) * 100
        else:
            percent_errors[i] = np.nan
    
    # Print Results
    print("\nSAMPLE PREDICTIONS:")
    print("   ACTUAL       PREDICTED       ERROR       % ERROR        ")
    
    for i in range(sample_size):
        if not np.isnan(percent_errors[i]):
            print(f" {y_sample_original[i]:11.1f}   {y_pred[i][0]:11.1f}   {errors[i]:11.1f}   {percent_errors[i]:14.1f}%  ")
        else:
            print(f" {y_sample_original[i]:11.1f}   {y_pred[i][0]:11.1f}   {errors[i]:11.1f}          N/A       ")
    
    
    # Create a visualization of sample predictions
    plt.figure(figsize=(12, 8))
    
    # Bar chart comparing actual vs predicted
    ind = np.arange(sample_size)
    width = 0.35
    
    plt.bar(ind, y_sample_original, width, label='Actual', color='royalblue')
    plt.bar(ind + width, y_pred.flatten(), width, label='Predicted', color='lightcoral')
    
    plt.ylabel('Maintenance Event Count')
    plt.title('Sample Predictions Comparison')
    plt.xticks(ind + width/2, [f'Sample {i+1}' for i in range(sample_size)])
    plt.legend(loc='best')
    plt.grid(axis='y', linestyle='--', alpha=0.7)
    
    # Add value labels
    for i, v in enumerate(y_sample_original):
        plt.text(i, v + 0.1, f'{v:.1f}', color='royalblue', fontweight='bold', ha='center')
        
    for i, v in enumerate(y_pred.flatten()):
        plt.text(i + width, v + 0.1, f'{v:.1f}', color='lightcoral', fontweight='bold', ha='center')
    
    plt.tight_layout()
    plt.savefig(os.path.join(viz_dir, 'sample_predictions.png'))
    plt.close()
    
    return y_pred, errors, percent_errors

def create_actual_vs_predicted_zoomed(plot_y_test, plot_y_pred, output_dir):
    plt.figure(figsize=(12, 10))
    
    # 0-10 range
    low_mask = (plot_y_test <= 10) & (plot_y_pred <= 10)
    plt.scatter(plot_y_test[low_mask], plot_y_pred[low_mask], alpha=0.5, s=30, color='#3366CC')
    
    # Add lines
    plt.plot([0, 10], [0, 10], 'r--', linewidth=2, label='Perfect Prediction')
    
    # Fit regression line
    if np.sum(low_mask) > 5: 
        from sklearn.linear_model import LinearRegression
        model = LinearRegression()
        X_low = plot_y_test[low_mask].reshape(-1, 1)
        y_low = plot_y_pred[low_mask]
        model.fit(X_low, y_low)
        pred_line = model.predict(np.array([[0], [10]]))
        plt.plot([0, 10], pred_line, 'g-', linewidth=2, 
                 label=f'Regression Line (slope={model.coef_[0]:.2f})')
    plt.hexbin(plot_y_test[low_mask], plot_y_pred[low_mask], gridsize=30, cmap='Blues', alpha=0.4)
    
    # Annotations
    low_r2 = np.corrcoef(plot_y_test[low_mask], plot_y_pred[low_mask])[0, 1]**2
    low_mae = np.mean(np.abs(plot_y_test[low_mask] - plot_y_pred[low_mask]))
    plt.annotate(f'R² (0-10 range) = {low_r2:.3f}\nMAE = {low_mae:.2f}', 
                 xy=(0.05, 0.95), xycoords='axes fraction',
                 bbox=dict(boxstyle="round,pad=0.5", fc="white", alpha=0.8))
    
    # Labels and title
    plt.xlabel('Actual Maintenance Events (0-10 range)', fontsize=14)
    plt.ylabel('Predicted Maintenance Events (0-10 range)', fontsize=14)
    plt.title('Actual vs. Predicted Maintenance Events (Zoomed to 0-10 range)', fontsize=16)
    plt.grid(True, alpha=0.3, linestyle='--')
    plt.legend(loc='upper left')
    
    plt.tight_layout()
    plt.savefig(os.path.join(viz_dir, 'actual_vs_predicted_zoomed.png'))
    plt.close()

def create_improved_density_plot(plot_y_test, plot_y_pred, output_dir):
    # Create a zoomed-in view for 0-10 range
    plt.figure(figsize=(12, 10))
    zoom_mask = (plot_y_test <= 10) & (plot_y_pred <= 10)
    hexbin = plt.hexbin(
        plot_y_test[zoom_mask], 
        plot_y_pred[zoom_mask], 
        gridsize=40, 
        cmap='YlOrRd',
        mincnt=1,
        bins='log',
        alpha=0.8
    )
    cbar = plt.colorbar(hexbin, label='Log10(Count)')
    
    # Add perfect prediction line
    plt.plot([0, 10], [0, 10], 'b--', linewidth=2, label='Perfect Prediction')
    
    # Add regression line
    from sklearn.linear_model import LinearRegression
    model = LinearRegression()
    X = plot_y_test[zoom_mask].reshape(-1, 1)
    model.fit(X, plot_y_pred[zoom_mask])
    pred_line = model.predict(np.array([[0], [10]]))
    plt.plot([0, 10], pred_line, 'g-', linewidth=2, 
             label=f'Regression Line (slope={model.coef_[0]:.2f})')
    
    # Add annotations for zoomed region
    r2_zoom = np.corrcoef(plot_y_test[zoom_mask], plot_y_pred[zoom_mask])[0, 1]**2
    mae_zoom = np.mean(np.abs(plot_y_test[zoom_mask] - plot_y_pred[zoom_mask]))
    
    metrics_text = (
        f'R² (0-10 range) = {r2_zoom:.3f}\n'
        f'MAE (0-10 range) = {mae_zoom:.2f}\n'
        f'Data points in 0-10 range: {np.sum(zoom_mask):,} ({np.sum(zoom_mask)/len(plot_y_test):.1%})'
    )
    
    plt.annotate(
        metrics_text, 
        xy=(0.05, 0.95), 
        xycoords='axes fraction',
        bbox=dict(boxstyle="round,pad=0.5", fc="white", ec="gray", alpha=0.8),
        va='top'
    )
    
    # Highlight important regions
    plt.axvspan(0, 1, color='lightgreen', alpha=0.2, label='Zero Maintenance Event Region')
    plt.axvspan(1, 5, color='lightyellow', alpha=0.2, label='Common Maintenance Event Range')
    
    # Set axis limits for zoom
    plt.xlim(-0.5, 10.5)
    plt.ylim(-0.5, 10.5)
    
    # Add grid, labels and title
    plt.grid(True, alpha=0.3, linestyle='--')
    plt.xlabel('Actual Maintenance Events (0-10 range)', fontsize=14)
    plt.ylabel('Predicted Maintenance Events (0-10 range)', fontsize=14)
    plt.title('Actual vs. Predicted Maintenance Events (0-10 Range)', fontsize=16)
    
    plt.legend(loc='upper left', framealpha=0.9)
    
    plt.tight_layout()
    plt.savefig(os.path.join(viz_dir, 'density_scatter_zoomed.png'))
    plt.close()
    
    return

def create_enhanced_training_history(history, output_dir):
    import matplotlib.pyplot as plt
    import numpy as np
    import os
    
    plt.figure(figsize=(12, 15))
    
    # Loss curve
    plt.subplot(3, 1, 1)
    loss_values = history['history']['loss']
    val_loss_values = history['history']['val_loss']
    epochs = range(1, len(loss_values) + 1)
    
    plt.plot(epochs, loss_values, 'b-', label='Training Loss')
    plt.plot(epochs, val_loss_values, 'r-', label='Validation Loss')
    
    # Find the best model
    best_val_loss_idx = np.argmin(val_loss_values)
    plt.plot(best_val_loss_idx + 1, val_loss_values[best_val_loss_idx], 'go', markersize=8)
    plt.annotate(f'Best: {val_loss_values[best_val_loss_idx]:.2f}', 
                xy=(best_val_loss_idx + 1, val_loss_values[best_val_loss_idx]),
                xytext=(best_val_loss_idx + 1 + 2, val_loss_values[best_val_loss_idx] * 1.2),
                arrowprops=dict(facecolor='green', shrink=0.05))
    
    # Add learning rate 
    if 'lr' in history['history']:
        ax2 = plt.gca().twinx()
        ax2.plot(epochs, history['history']['lr'], 'g--', label='Learning Rate')
        ax2.set_ylabel('Learning Rate')
        ax2.set_yscale('log')
        lines1, labels1 = plt.gca().get_legend_handles_labels()
        lines2, labels2 = ax2.get_legend_handles_labels()
        ax2.legend(lines1 + lines2, labels1 + labels2, loc='upper right')
    else:
        plt.legend(loc='upper right')
    
    plt.title('Training and Validation Loss')
    plt.xlabel('Epochs')
    plt.ylabel('Loss (MSE)')
    plt.grid(True, linestyle='--', alpha=0.7)
    
    # MAE curve
    plt.subplot(3, 1, 2)
    mae_values = history['history']['mae']
    val_mae_values = history['history']['val_mae']
    
    plt.plot(epochs, mae_values, 'b-', label='Training MAE')
    plt.plot(epochs, val_mae_values, 'r-', label='Validation MAE')
    
    # Find the best MAE
    best_val_mae_idx = np.argmin(val_mae_values)
    plt.plot(best_val_mae_idx + 1, val_mae_values[best_val_mae_idx], 'go', markersize=8)
    plt.annotate(f'Best: {val_mae_values[best_val_mae_idx]:.2f}', 
                xy=(best_val_mae_idx + 1, val_mae_values[best_val_mae_idx]),
                xytext=(best_val_mae_idx + 1 + 2, val_mae_values[best_val_mae_idx] * 1.2),
                arrowprops=dict(facecolor='green', shrink=0.05))
    
    plt.title('Training and Validation MAE')
    plt.xlabel('Epochs')
    plt.ylabel('Mean Absolute Error')
    plt.legend(loc='upper right')
    plt.grid(True, linestyle='--', alpha=0.7)
    
    # Zoomed Loss curve (last 75% of training)
    plt.subplot(3, 1, 3)
    start_idx = len(loss_values) // 4  # Start from 25% of training
    
    plt.plot(epochs[start_idx:], loss_values[start_idx:], 'b-', label='Training Loss')
    plt.plot(epochs[start_idx:], val_loss_values[start_idx:], 'r-', label='Validation Loss')
    
    # Calculate improvement
    if loss_values[start_idx] > 0:
        improvement = ((loss_values[start_idx] - loss_values[-1]) / loss_values[start_idx]) * 100
        plt.annotate(f'Improvement: {improvement:.1f}%', 
                    xy=(epochs[-1], loss_values[-1]),
                    xytext=(epochs[-1] - 5, loss_values[-1] * 1.3),
                    arrowprops=dict(facecolor='blue', shrink=0.05))
    
    plt.title('Training and Validation Loss (Zoomed)')
    plt.xlabel('Epochs')
    plt.ylabel('Loss (MSE)')
    plt.legend(loc='upper right')
    plt.grid(True, linestyle='--', alpha=0.7)
    
    plt.tight_layout()
    plt.savefig(os.path.join(output_dir, 'training_history.png'))
    plt.close()
    
    print(f"Training history visualization saved to {output_dir}")
    return
try:
    if 'test_data' not in locals():
        if os.path.exists(test_data_path):
            test_data_loaded = np.load(test_data_path, allow_pickle=True)
            X_test = test_data_loaded['X_test']
            y_test = test_data_loaded['y_test'] 
            y_test_original = test_data_loaded['y_test_original']
            test_data = (X_test, y_test, y_test_original)
        else:
            raise ValueError("Test data not available. Please save it during training.")
    
    # Evaluate model
    print("\nEvaluating model...")
    metrics = evaluate_model(model, test_data, output_dir)
    
    # Save metrics
    metrics_df = pd.DataFrame({k: [v] for k, v in metrics.items() if k != 'y_pred'})
    metrics_df.to_csv(os.path.join(output_dir, 'model_metrics.csv'), index=False)
    
    # Create visualizations
    print("\nCreating visualizations...")
    y_pred = metrics.pop('y_pred')  # Get predictions from metrics
    plot_y_test, plot_y_pred, errors = generate_model_evaluation_plots(test_data, y_pred, output_dir)
    generate_density_plots(plot_y_test, plot_y_pred, errors, output_dir)
    create_actual_vs_predicted_zoomed(plot_y_test, plot_y_pred, output_dir)
    create_improved_density_plot(plot_y_test, plot_y_pred, viz_dir)  # Note: Output to viz_dir
    
    # Generate sample predictions
    print("\nGenerating sample predictions...")
    generate_sample_predictions(model, test_data, output_dir)

    # Create enhanced training history visualization
    create_enhanced_training_history(history, viz_dir)
    
    # Print summary
    batch_size = 128  # Define this for the summary
    print("\n")
    print("MODEL EVALUATION SUMMARY")
    print(f"GPU Acceleration: {'Enabled' if len(tf.config.list_physical_devices('GPU')) > 0 else 'Disabled'}")
    print(f"Precision: {tf.keras.mixed_precision.global_policy().name}")
    print(f"Batch Size for Prediction: {batch_size}")
    print(f"Target Transformation: Log(x+1) → Inverse: exp(x)-1")
    print(f"Visualization Directory: {viz_dir}")
    
    print("\nTest Metrics:")
    print(f"MSE: {metrics['mse']:.4f}")
    print(f"RMSE: {metrics['rmse']:.4f}")
    print(f"MAE: {metrics['mae']:.4f}")
    print(f"F1 Score: {metrics['f1']:.4f}")
    print(f"Precision: {metrics['precision']:.4f}")
    print(f"Recall: {metrics['recall']:.4f}")

    print("\nAll visualizations have been saved.")
    
except Exception as e:
    print(f"Error during evaluation: {e}")
    import traceback
    traceback.print_exc()