# 05. Model Evaluation and Comparison

**Objective:** Compare all forecasting models and generate comprehensive evaluation results

**Models Compared:**
- **Econometric:** Random Walk, VAR, ARIMA
- **Linear ML:** LASSO, Ridge, Elastic Net, LARS
- **Nonlinear ML:** Random Forest, SVR, XGBoost

**Evaluation:**
- Performance metrics (RMSFE, MAPE)
- Visual comparisons
- Statistical tests
- Export results for LaTeX report

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from pathlib import Path
import warnings
warnings.filterwarnings('ignore')

# Custom metrics
import sys
sys.path.append('../')
from utils.metrics import rmsfe, mape

# Paths
RESULTS_PATH = Path('../results')
TABLES_PATH = RESULTS_PATH / 'tables'
FIGURES_PATH = RESULTS_PATH / 'figures'

# Set style
sns.set_style('whitegrid')
plt.rcParams['figure.figsize'] = (14, 8)

## 1. Load All Results

In [None]:
# Load results from each model category
econometric_results = pd.read_csv(TABLES_PATH / 'econometric_results.csv')
linear_ml_results = pd.read_csv(TABLES_PATH / 'linear_ml_results.csv')
nonlinear_ml_results = pd.read_csv(TABLES_PATH / 'nonlinear_ml_results.csv')

# Combine all results
all_results = pd.concat([
    econometric_results,
    linear_ml_results,
    nonlinear_ml_results
], ignore_index=True)

# Add model category
all_results['category'] = [
    'Econometric' if model in ['Random Walk', 'ARIMA', 'VAR'] else
    'Linear ML' if model in ['LASSO', 'Ridge', 'Elastic Net', 'LARS'] else
    'Nonlinear ML'
    for model in all_results['model']
]

print("All Models Performance:")
print(all_results)

## 2. Performance Comparison Table

In [None]:
# Sort by RMSFE
all_results_sorted = all_results.sort_values('RMSFE')

print("\n" + "="*70)
print("COMPLETE MODEL PERFORMANCE RANKING")
print("="*70)
print(all_results_sorted.to_string(index=False))
print("="*70)

# Save complete results
all_results_sorted.to_csv(TABLES_PATH / 'all_models_results.csv', index=False)
print("\n✓ Complete results saved")

In [None]:
# Best models in each category
best_by_category = all_results.loc[all_results.groupby('category')['RMSFE'].idxmin()]

print("\nBest Model in Each Category:")
print(best_by_category[['category', 'model', 'RMSFE', 'MAPE']].to_string(index=False))

## 3. Visualization - Performance Comparison

In [None]:
# Bar plot - RMSFE comparison
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(16, 6))

# RMSFE
colors = ['#1f77b4' if cat == 'Econometric' else '#ff7f0e' if cat == 'Linear ML' else '#2ca02c' 
          for cat in all_results_sorted['category']]
ax1.barh(all_results_sorted['model'], all_results_sorted['RMSFE'], color=colors)
ax1.set_xlabel('RMSFE (Lower is Better)', fontsize=12)
ax1.set_title('Model Comparison - RMSFE', fontsize=14, fontweight='bold')
ax1.invert_yaxis()

# MAPE
ax2.barh(all_results_sorted['model'], all_results_sorted['MAPE'], color=colors)
ax2.set_xlabel('MAPE (%) (Lower is Better)', fontsize=12)
ax2.set_title('Model Comparison - MAPE', fontsize=14, fontweight='bold')
ax2.invert_yaxis()

# Legend
from matplotlib.patches import Patch
legend_elements = [
    Patch(facecolor='#1f77b4', label='Econometric'),
    Patch(facecolor='#ff7f0e', label='Linear ML'),
    Patch(facecolor='#2ca02c', label='Nonlinear ML')
]
fig.legend(handles=legend_elements, loc='upper center', ncol=3, fontsize=11)

plt.tight_layout(rect=[0, 0, 1, 0.96])
plt.savefig(FIGURES_PATH / 'all_models_comparison.png', dpi=300, bbox_inches='tight')
plt.show()

In [None]:
# Category comparison - boxplot
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(14, 6))

# RMSFE by category
all_results.boxplot(column='RMSFE', by='category', ax=ax1)
ax1.set_title('RMSFE by Model Category')
ax1.set_xlabel('Model Category')
ax1.set_ylabel('RMSFE')
plt.sca(ax1)
plt.xticks(rotation=45)

# MAPE by category
all_results.boxplot(column='MAPE', by='category', ax=ax2)
ax2.set_title('MAPE by Model Category')
ax2.set_xlabel('Model Category')
ax2.set_ylabel('MAPE (%)')
plt.sca(ax2)
plt.xticks(rotation=45)

plt.suptitle('')  # Remove default title
plt.tight_layout()
plt.savefig(FIGURES_PATH / 'category_comparison_boxplot.png', dpi=300, bbox_inches='tight')
plt.show()

## 4. Generate LaTeX Table

In [None]:
# Generate LaTeX table for paper
def generate_latex_table(df, caption="Model Performance Comparison", label="tab:model_comparison"):
    """
    Generate a LaTeX table from results dataframe
    """
    latex_str = "\\begin{table}[htbp]\n"
    latex_str += "  \\centering\n"
    latex_str += f"  \\caption{{{caption}}}\n"
    latex_str += f"  \\label{{{label}}}\n"
    latex_str += "  \\begin{tabular}{llcc}\n"
    latex_str += "    \\hline\n"
    latex_str += "    \\textbf{Category} & \\textbf{Model} & \\textbf{RMSFE} & \\textbf{MAPE (\\%)} \\\\\n"
    latex_str += "    \\hline\n"
    
    for _, row in df.iterrows():
        latex_str += f"    {row['category']} & {row['model']} & {row['RMSFE']:.4f} & {row['MAPE']:.2f} \\\\\n"
    
    latex_str += "    \\hline\n"
    latex_str += "  \\end{tabular}\n"
    latex_str += "\\end{table}\n"
    
    return latex_str

# Generate table
latex_table = generate_latex_table(all_results_sorted)

# Save to file
with open(TABLES_PATH / 'model_comparison_latex.tex', 'w') as f:
    f.write(latex_table)

print("LaTeX Table Generated:")
print("="*70)
print(latex_table)
print("="*70)
print("\n✓ LaTeX table saved to results/tables/model_comparison_latex.tex")

## 5. Summary Statistics

In [None]:
# Summary by category
category_summary = all_results.groupby('category').agg({
    'RMSFE': ['mean', 'std', 'min', 'max'],
    'MAPE': ['mean', 'std', 'min', 'max']
}).round(4)

print("\nPerformance Summary by Category:")
print("="*70)
print(category_summary)
print("="*70)

category_summary.to_csv(TABLES_PATH / 'category_summary.csv')
print("\n✓ Category summary saved")

In [None]:
# Overall best model
best_model = all_results_sorted.iloc[0]

print("\n" + "="*70)
print("OVERALL BEST MODEL")
print("="*70)
print(f"Model: {best_model['model']}")
print(f"Category: {best_model['category']}")
print(f"RMSFE: {best_model['RMSFE']:.4f}")
print(f"MAPE: {best_model['MAPE']:.2f}%")
print("="*70)

## 6. Key Insights

In [None]:
# Generate insights
insights = []

# Best model
insights.append(f"1. Best Overall Model: {best_model['model']} (RMSFE: {best_model['RMSFE']:.4f})")

# Best in each category
for _, row in best_by_category.iterrows():
    insights.append(f"   - Best {row['category']}: {row['model']} (RMSFE: {row['RMSFE']:.4f})")

# Category performance
avg_rmsfe = all_results.groupby('category')['RMSFE'].mean().sort_values()
insights.append(f"\n2. Category Performance (avg RMSFE):")
for cat, rmsfe in avg_rmsfe.items():
    insights.append(f"   - {cat}: {rmsfe:.4f}")

# Model count
insights.append(f"\n3. Total models evaluated: {len(all_results)}")

print("\n" + "="*70)
print("KEY INSIGHTS")
print("="*70)
for insight in insights:
    print(insight)
print("="*70)

# Save insights
with open(RESULTS_PATH / 'key_insights.txt', 'w') as f:
    f.write("\n".join(insights))

print("\n✓ Key insights saved to results/key_insights.txt")

## Summary

**Evaluation completed:**
- ✓ All models compared
- ✓ Performance metrics calculated
- ✓ Visualizations created
- ✓ LaTeX table generated for paper
- ✓ Key insights identified

**Files generated:**
- `results/tables/all_models_results.csv`
- `results/tables/category_summary.csv`
- `results/tables/model_comparison_latex.tex`
- `results/figures/all_models_comparison.png`
- `results/figures/category_comparison_boxplot.png`
- `results/key_insights.txt`

**Next steps:**
- Use these results to write your paper
- Include the LaTeX table in your report
- Analyze the best-performing models further
- Consider ensemble methods combining top models