# Neural Network Metrics Calculation

This notebook calculates comprehensive metrics for LSTM neural network models using the results saved from the Neural Network Modeling script. We compute both **Price Prediction (Regression)** and **Directional Forecasting (Classification)** metrics to match the performance comparison table.

---

# 1. Setup and Data Loading


In [1]:
# Core libraries
import numpy as np
import pandas as pd
import warnings
import os

# Metrics
from sklearn.metrics import (
    mean_squared_error, mean_absolute_error, r2_score,
    precision_score, recall_score, roc_auc_score, log_loss,
    accuracy_score, f1_score, confusion_matrix
)

warnings.filterwarnings('ignore')

print("Libraries loaded successfully!")


Libraries loaded successfully!


In [2]:
# Load LSTM predictions from results folder
predictions_df = pd.read_csv('../results/lstm_predictions.csv')
top10_metrics_df = pd.read_csv('../results/lstm_top10_model_metrics.csv')
summary_df = pd.read_csv('../results/lstm_summary.csv')
grid_search_df = pd.read_csv('../results/lstm_grid_search_results.csv')

print("LSTM Predictions Shape:", predictions_df.shape)
print("\nColumns:", predictions_df.columns.tolist())
print("\nFirst 5 rows:")
predictions_df.head()


LSTM Predictions Shape: (182, 13)

Columns: ['Index', 'LSTM_Model_1', 'LSTM_Model_2', 'LSTM_Model_3', 'LSTM_Model_4', 'LSTM_Model_5', 'LSTM_Model_6', 'LSTM_Model_7', 'LSTM_Model_8', 'LSTM_Model_9', 'LSTM_Model_10', 'Ensemble_Prediction', 'Actual']

First 5 rows:


Unnamed: 0,Index,LSTM_Model_1,LSTM_Model_2,LSTM_Model_3,LSTM_Model_4,LSTM_Model_5,LSTM_Model_6,LSTM_Model_7,LSTM_Model_8,LSTM_Model_9,LSTM_Model_10,Ensemble_Prediction,Actual
0,0,69.42661,69.85628,68.98693,67.73339,66.55602,68.749214,68.04572,72.45026,66.96405,68.92935,68.76979,66.360001
1,1,69.29788,69.65344,68.70286,67.39886,66.18405,68.53883,67.90492,72.062775,66.575874,68.23689,68.45564,67.040001
2,2,69.52577,69.61963,68.08421,67.44733,66.03835,67.30228,68.73071,72.09659,66.076744,67.94525,68.28668,66.029999
3,3,69.01991,68.55527,67.66247,66.84239,64.8726,66.37115,68.122284,71.28057,65.43216,67.160576,67.53194,66.25
4,4,69.04501,68.595665,67.33832,66.958435,64.53384,66.129105,68.397514,71.27795,65.46219,66.94406,67.468216,67.68


In [3]:
# Display LSTM Summary
print("=" * 70)
print("LSTM MODEL SUMMARY")
print("=" * 70)
for _, row in summary_df.iterrows():
    print(f"{row['Metric']}: {row['Value']}")


LSTM MODEL SUMMARY
Total Models Evaluated: 216.0
Top 10 Models Used: 10.0
Test Set Size: 182.0
Best Model RMSE: 1.775326415749567
Best Model R²: 0.7643108585478032
Ensemble RMSE: 3.646956269589488
Ensemble R²: 0.008215506318143


---

# 2. Price Prediction (Regression) Metrics

We calculate:
- **RMSE**: Root Mean Squared Error
- **MAPE**: Mean Absolute Percentage Error  
- **AIC**: Akaike Information Criterion (approximated)
- **R²**: Coefficient of Determination


In [4]:
def calculate_regression_metrics(actual, predicted, n_params=None):
    """
    Calculate regression metrics for price prediction.
    
    Args:
        actual: Array of actual prices
        predicted: Array of predicted prices
        n_params: Number of model parameters (for AIC calculation)
    
    Returns:
        Dictionary with RMSE, MAPE, AIC, R²
    """
    n = len(actual)
    
    # RMSE
    mse = mean_squared_error(actual, predicted)
    rmse = np.sqrt(mse)
    
    # MAPE (avoiding division by zero)
    mask = actual != 0
    mape = np.mean(np.abs((actual[mask] - predicted[mask]) / actual[mask])) * 100
    
    # R²
    r2 = r2_score(actual, predicted)
    
    # AIC (approximation using residual sum of squares)
    # AIC = n * ln(RSS/n) + 2k where k is number of parameters
    if n_params is None:
        # Estimate parameters based on typical LSTM model
        n_params = 100  # Approximate for LSTM
    
    rss = np.sum((actual - predicted) ** 2)
    if rss > 0:
        aic = n * np.log(rss / n) + 2 * n_params
    else:
        aic = np.nan
    
    return {
        'RMSE': rmse,
        'MAPE (%)': mape,
        'AIC': aic,
        'R²': r2
    }


In [5]:
# Extract actual values and predictions
actual = predictions_df['Actual'].values
ensemble_pred = predictions_df['Ensemble_Prediction'].values

# Get individual model predictions
model_cols = [col for col in predictions_df.columns if col.startswith('LSTM_Model_')]

print(f"Number of test samples: {len(actual)}")
print(f"Number of individual models: {len(model_cols)}")
print(f"\nActual price range: ${actual.min():.2f} - ${actual.max():.2f}")


Number of test samples: 182
Number of individual models: 10

Actual price range: $57.13 - $75.14


In [6]:
# Calculate regression metrics for each individual model
print("=" * 70)
print("INDIVIDUAL MODEL REGRESSION METRICS")
print("=" * 70)

individual_metrics = []

for col in model_cols:
    pred = predictions_df[col].values
    metrics = calculate_regression_metrics(actual, pred)
    metrics['Model'] = col
    individual_metrics.append(metrics)
    print(f"\n{col}:")
    print(f"  RMSE: {metrics['RMSE']:.4f}")
    print(f"  MAPE: {metrics['MAPE (%)']:.2f}%")
    print(f"  R²:   {metrics['R²']:.4f}")

individual_metrics_df = pd.DataFrame(individual_metrics)


INDIVIDUAL MODEL REGRESSION METRICS

LSTM_Model_1:
  RMSE: 4.6065
  MAPE: 6.47%
  R²:   -0.5824

LSTM_Model_2:
  RMSE: 5.1550
  MAPE: 7.44%
  R²:   -0.9816

LSTM_Model_3:
  RMSE: 4.2269
  MAPE: 5.88%
  R²:   -0.3323

LSTM_Model_4:
  RMSE: 2.4452
  MAPE: 3.10%
  R²:   0.5542

LSTM_Model_5:
  RMSE: 1.7750
  MAPE: 2.14%
  R²:   0.7651

LSTM_Model_6:
  RMSE: 3.0575
  MAPE: 3.73%
  R²:   0.3029

LSTM_Model_7:
  RMSE: 2.8859
  MAPE: 3.97%
  R²:   0.3789

LSTM_Model_8:
  RMSE: 8.0219
  MAPE: 12.22%
  R²:   -3.7985

LSTM_Model_9:
  RMSE: 3.7188
  MAPE: 5.21%
  R²:   -0.0313

LSTM_Model_10:
  RMSE: 3.5408
  MAPE: 5.00%
  R²:   0.0651


In [7]:
# Calculate metrics for ensemble prediction
print("\n" + "=" * 70)
print("ENSEMBLE MODEL REGRESSION METRICS")
print("=" * 70)

ensemble_reg_metrics = calculate_regression_metrics(actual, ensemble_pred)
print(f"\nEnsemble Prediction:")
print(f"  RMSE: {ensemble_reg_metrics['RMSE']:.4f}")
print(f"  MAPE: {ensemble_reg_metrics['MAPE (%)']:.2f}%")
print(f"  R²:   {ensemble_reg_metrics['R²']:.4f}")



ENSEMBLE MODEL REGRESSION METRICS

Ensemble Prediction:
  RMSE: 3.6470
  MAPE: 5.16%
  R²:   0.0082


In [8]:
# Find the best single model
best_single_idx = individual_metrics_df['RMSE'].idxmin()
best_single_model = individual_metrics_df.loc[best_single_idx]

print("\n" + "=" * 70)
print("BEST SINGLE MODEL")
print("=" * 70)
print(f"\n{best_single_model['Model']}:")
print(f"  RMSE: {best_single_model['RMSE']:.4f}")
print(f"  MAPE: {best_single_model['MAPE (%)']:.2f}%")
print(f"  R²:   {best_single_model['R²']:.4f}")



BEST SINGLE MODEL

LSTM_Model_5:
  RMSE: 1.7750
  MAPE: 2.14%
  R²:   0.7651


---

# 3. Directional Forecasting (Classification) Metrics

For directional forecasting, we predict whether the price will go **UP** or **DOWN** compared to the previous day:
- **Precision**: Of all predicted UP days, how many were actually UP?
- **Recall**: Of all actual UP days, how many did we predict?
- **AUC-ROC**: Area Under the ROC Curve
- **Log Loss**: Binary cross-entropy loss


In [9]:
def calculate_directional_metrics(actual, predicted):
    """
    Calculate directional forecasting (classification) metrics.
    
    Args:
        actual: Array of actual prices
        predicted: Array of predicted prices
    
    Returns:
        Dictionary with Precision, Recall, AUC-ROC, Log Loss
    """
    # Calculate actual and predicted directions (1 = up, 0 = down)
    # Direction is relative to previous value
    actual_direction = (np.diff(actual) > 0).astype(int)
    predicted_direction = (np.diff(predicted) > 0).astype(int)
    
    # For probability-based metrics, use normalized price changes as confidence
    pred_changes = np.diff(predicted)
    # Normalize to [0, 1] range for probability
    pred_proba = 1 / (1 + np.exp(-pred_changes))  # Sigmoid of price change
    
    # Handle edge cases
    if len(np.unique(actual_direction)) < 2:
        return {
            'Precision': np.nan,
            'Recall': np.nan,
            'AUC-ROC': np.nan,
            'Log Loss': np.nan
        }
    
    # Precision and Recall
    precision = precision_score(actual_direction, predicted_direction, zero_division=0)
    recall = recall_score(actual_direction, predicted_direction, zero_division=0)
    
    # AUC-ROC
    try:
        auc_roc = roc_auc_score(actual_direction, pred_proba)
    except:
        auc_roc = np.nan
    
    # Log Loss
    try:
        # Clip probabilities to avoid log(0)
        pred_proba_clipped = np.clip(pred_proba, 1e-15, 1 - 1e-15)
        logloss = log_loss(actual_direction, pred_proba_clipped)
    except:
        logloss = np.nan
    
    return {
        'Precision': precision,
        'Recall': recall,
        'AUC-ROC': auc_roc,
        'Log Loss': logloss,
        'Accuracy': accuracy_score(actual_direction, predicted_direction)
    }


In [10]:
# Calculate directional metrics for each individual model
print("=" * 70)
print("INDIVIDUAL MODEL DIRECTIONAL METRICS")
print("=" * 70)

individual_dir_metrics = []

for col in model_cols:
    pred = predictions_df[col].values
    metrics = calculate_directional_metrics(actual, pred)
    metrics['Model'] = col
    individual_dir_metrics.append(metrics)
    print(f"\n{col}:")
    print(f"  Precision: {metrics['Precision']:.2f}")
    print(f"  Recall:    {metrics['Recall']:.2f}")
    print(f"  AUC-ROC:   {metrics['AUC-ROC']:.2f}")
    print(f"  Log Loss:  {metrics['Log Loss']:.2f}")
    print(f"  Accuracy:  {metrics['Accuracy']:.2f}")

individual_dir_metrics_df = pd.DataFrame(individual_dir_metrics)


INDIVIDUAL MODEL DIRECTIONAL METRICS

LSTM_Model_1:
  Precision: 0.51
  Recall:    0.47
  AUC-ROC:   0.48
  Log Loss:  0.75
  Accuracy:  0.50

LSTM_Model_2:
  Precision: 0.51
  Recall:    0.49
  AUC-ROC:   0.46
  Log Loss:  0.85
  Accuracy:  0.50

LSTM_Model_3:
  Precision: 0.43
  Recall:    0.37
  AUC-ROC:   0.42
  Log Loss:  0.80
  Accuracy:  0.43

LSTM_Model_4:
  Precision: 0.47
  Recall:    0.46
  AUC-ROC:   0.46
  Log Loss:  0.79
  Accuracy:  0.46

LSTM_Model_5:
  Precision: 0.49
  Recall:    0.46
  AUC-ROC:   0.47
  Log Loss:  0.86
  Accuracy:  0.48

LSTM_Model_6:
  Precision: 0.46
  Recall:    0.45
  AUC-ROC:   0.43
  Log Loss:  0.83
  Accuracy:  0.45

LSTM_Model_7:
  Precision: 0.52
  Recall:    0.49
  AUC-ROC:   0.48
  Log Loss:  0.80
  Accuracy:  0.51

LSTM_Model_8:
  Precision: 0.48
  Recall:    0.42
  AUC-ROC:   0.47
  Log Loss:  0.85
  Accuracy:  0.48

LSTM_Model_9:
  Precision: 0.48
  Recall:    0.45
  AUC-ROC:   0.47
  Log Loss:  0.78
  Accuracy:  0.47

LSTM_Model_10:
  

In [11]:
# Calculate directional metrics for ensemble
print("\n" + "=" * 70)
print("ENSEMBLE MODEL DIRECTIONAL METRICS")
print("=" * 70)

ensemble_dir_metrics = calculate_directional_metrics(actual, ensemble_pred)
print(f"\nEnsemble Prediction:")
print(f"  Precision: {ensemble_dir_metrics['Precision']:.2f}")
print(f"  Recall:    {ensemble_dir_metrics['Recall']:.2f}")
print(f"  AUC-ROC:   {ensemble_dir_metrics['AUC-ROC']:.2f}")
print(f"  Log Loss:  {ensemble_dir_metrics['Log Loss']:.2f}")
print(f"  Accuracy:  {ensemble_dir_metrics['Accuracy']:.2f}")



ENSEMBLE MODEL DIRECTIONAL METRICS

Ensemble Prediction:
  Precision: 0.47
  Recall:    0.42
  AUC-ROC:   0.45
  Log Loss:  0.80
  Accuracy:  0.46


---

# 4. Comprehensive Model Comparison Table

This section creates a summary table matching the format of our research paper's Table 1.


In [12]:
# Create comprehensive comparison table
print("=" * 100)
print("COMPREHENSIVE NEURAL NETWORK METRICS SUMMARY")
print("=" * 100)

# Combine regression and directional metrics for best single model
best_model_name = best_single_model['Model']
best_model_pred = predictions_df[best_model_name].values
best_single_dir = calculate_directional_metrics(actual, best_model_pred)

# Create summary dataframe
nn_summary = {
    'Model': ['Neural Network (Single Best)', 'Neural Network (Ensemble)'],
    'RMSE': [best_single_model['RMSE'], ensemble_reg_metrics['RMSE']],
    'MAPE (%)': [best_single_model['MAPE (%)'], ensemble_reg_metrics['MAPE (%)']],
    'AIC': [best_single_model['AIC'], ensemble_reg_metrics['AIC']],
    'R²': [best_single_model['R²'], ensemble_reg_metrics['R²']],
    'Precision': [best_single_dir['Precision'], ensemble_dir_metrics['Precision']],
    'Recall': [best_single_dir['Recall'], ensemble_dir_metrics['Recall']],
    'AUC-ROC': [best_single_dir['AUC-ROC'], ensemble_dir_metrics['AUC-ROC']],
    'Log Loss': [best_single_dir['Log Loss'], ensemble_dir_metrics['Log Loss']]
}

nn_summary_df = pd.DataFrame(nn_summary)
print("\nNeural Network Performance Summary:")
print(nn_summary_df.round(2).to_string(index=False))


COMPREHENSIVE NEURAL NETWORK METRICS SUMMARY

Neural Network Performance Summary:
                       Model  RMSE  MAPE (%)    AIC   R²  Precision  Recall  AUC-ROC  Log Loss
Neural Network (Single Best)  1.78      2.14 408.87 0.77       0.49    0.46     0.47      0.86
   Neural Network (Ensemble)  3.65      5.16 670.98 0.01       0.47    0.42     0.45      0.80


In [13]:
# Display in format matching Table 1
print("\n" + "=" * 100)
print("TABLE 1 FORMAT: Performance Comparison of Machine Learning Models")
print("=" * 100)
print("\n" + "-" * 100)
print(f"{'Model':<30} | {'RMSE':>8} {'MAPE (%)':>10} {'AIC':>10} {'R²':>8} | {'Precision':>10} {'Recall':>8} {'AUC-ROC':>10} {'Log Loss':>10}")
print("-" * 100)
print(f"{'Neural Network (Single Best)':<30} | {best_single_model['RMSE']:>8.2f} {best_single_model['MAPE (%)']:>10.2f} {'N/A':>10} {best_single_model['R²']:>8.2f} | {best_single_dir['Precision']:>10.2f} {best_single_dir['Recall']:>8.2f} {best_single_dir['AUC-ROC']:>10.2f} {best_single_dir['Log Loss']:>10.2f}")
print(f"{'Neural Network (Ensemble)':<30} | {ensemble_reg_metrics['RMSE']:>8.2f} {ensemble_reg_metrics['MAPE (%)']:>10.2f} {'N/A':>10} {ensemble_reg_metrics['R²']:>8.2f} | {ensemble_dir_metrics['Precision']:>10.2f} {ensemble_dir_metrics['Recall']:>8.2f} {ensemble_dir_metrics['AUC-ROC']:>10.2f} {ensemble_dir_metrics['Log Loss']:>10.2f}")
print("-" * 100)



TABLE 1 FORMAT: Performance Comparison of Machine Learning Models

----------------------------------------------------------------------------------------------------
Model                          |     RMSE   MAPE (%)        AIC       R² |  Precision   Recall    AUC-ROC   Log Loss
----------------------------------------------------------------------------------------------------
Neural Network (Single Best)   |     1.78       2.14        N/A     0.77 |       0.49     0.46       0.47       0.86
Neural Network (Ensemble)      |     3.65       5.16        N/A     0.01 |       0.47     0.42       0.45       0.80
----------------------------------------------------------------------------------------------------


---

# 5. Save Results to CSV


In [14]:
# Combine all individual model metrics
all_individual_metrics = individual_metrics_df.merge(
    individual_dir_metrics_df, 
    on='Model'
)

# Save individual model metrics
all_individual_metrics.to_csv('../results/nn_individual_model_metrics.csv', index=False)
print("Saved: ../results/nn_individual_model_metrics.csv")

# Save summary metrics
nn_summary_df.to_csv('../results/nn_summary_metrics.csv', index=False)
print("Saved: ../results/nn_summary_metrics.csv")

print("\nAll neural network metrics exported successfully!")


Saved: ../results/nn_individual_model_metrics.csv
Saved: ../results/nn_summary_metrics.csv

All neural network metrics exported successfully!


In [15]:
# Display final summary
print("\n" + "=" * 70)
print("FINAL SUMMARY")
print("=" * 70)

print(f"\nBest Single Model: {best_single_model['Model']}")
print(f"  - RMSE: {best_single_model['RMSE']:.4f}")
print(f"  - MAPE: {best_single_model['MAPE (%)']:.2f}%")
print(f"  - R²: {best_single_model['R²']:.4f}")
print(f"  - Directional Accuracy: {best_single_dir['Accuracy']*100:.1f}%")

print(f"\nEnsemble Model (Average of Top 10):")
print(f"  - RMSE: {ensemble_reg_metrics['RMSE']:.4f}")
print(f"  - MAPE: {ensemble_reg_metrics['MAPE (%)']:.2f}%")
print(f"  - R²: {ensemble_reg_metrics['R²']:.4f}")
print(f"  - Directional Accuracy: {ensemble_dir_metrics['Accuracy']*100:.1f}%")



FINAL SUMMARY

Best Single Model: LSTM_Model_5
  - RMSE: 1.7750
  - MAPE: 2.14%
  - R²: 0.7651
  - Directional Accuracy: 48.1%

Ensemble Model (Average of Top 10):
  - RMSE: 3.6470
  - MAPE: 5.16%
  - R²: 0.0082
  - Directional Accuracy: 46.4%
