# üìä Ergebnis√ºbersicht - Solar Energie Vorhersage

**Projekt:** Advanced Time Series Prediction  
**Datenquelle:** SMARD API (Filter 4068 - Korrigierte Solar-Daten)  
**Zeitraum:** 2022-01-01 bis 2024-12-31  

---

Dieses Notebook l√§dt alle gespeicherten Ergebnisse und zeigt sie √ºbersichtlich an.  
Sie k√∂nnen es jederzeit ausf√ºhren, um die aktuellen Modellperformance-Metriken zu sehen.

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from pathlib import Path

# Stil
plt.style.use('seaborn-v0_8-darkgrid')
sns.set_palette('husl')

# Pfad zu den Ergebnissen
RESULTS_DIR = Path('../results/metrics')
FIGURES_DIR = Path('../results/figures')

print("‚úÖ Setup abgeschlossen")

## 1Ô∏è‚É£ Baseline Models

Einfache statistische Modelle als Benchmark

In [None]:
baseline = pd.read_csv(RESULTS_DIR / 'solar_baseline_results.csv', index_col=0)
baseline = baseline.sort_values('test_rmse')

print("="*80)
print("BASELINE MODELS - ERGEBNISSE")
print("="*80)
display(baseline.round(2).style.background_gradient(cmap='RdYlGn_r', subset=['test_rmse', 'test_mae']))

print(f"\nüèÜ Bestes Baseline-Modell: {baseline.index[0]}")
print(f"   RMSE: {baseline.iloc[0]['test_rmse']:.2f} MW")
print(f"   MAPE: {baseline.iloc[0]['test_mape']:.2f}%")
print(f"   R¬≤:   {baseline.iloc[0]['test_r2']:.4f}")

## 2Ô∏è‚É£ Machine Learning Tree Models

Gradient Boosting und Ensemble-Methoden

In [None]:
ml_tree = pd.read_csv(RESULTS_DIR / 'solar_ml_tree_results.csv', index_col=0)
ml_tree = ml_tree.sort_values('test_rmse')

print("="*80)
print("MACHINE LEARNING TREE MODELS - ERGEBNISSE")
print("="*80)
display(ml_tree.round(2).style.background_gradient(cmap='RdYlGn_r', subset=['test_rmse', 'test_mae']))

print(f"\nüèÜ Bestes ML-Modell: {ml_tree.index[0]}")
print(f"   RMSE: {ml_tree.iloc[0]['test_rmse']:.2f} MW")
print(f"   MAPE: {ml_tree.iloc[0]['test_mape']:.2f}%")
print(f"   R¬≤:   {ml_tree.iloc[0]['test_r2']:.4f}")

## 3Ô∏è‚É£ Vergleich: Baseline vs. ML Models

Visualisierung der Verbesserung durch ML-Modelle

In [None]:
# Kombiniere Ergebnisse
all_results = pd.concat([
    baseline.assign(category='Baseline'),
    ml_tree.assign(category='ML Tree')
])

# Visualisierung
fig, axes = plt.subplots(2, 2, figsize=(16, 10))

# RMSE
all_results.sort_values('test_rmse')[['test_rmse', 'category']].plot(
    kind='barh', x=all_results.sort_values('test_rmse').index, y='test_rmse', 
    ax=axes[0,0], color=['lightcoral' if c == 'Baseline' else 'lightgreen' for c in all_results.sort_values('test_rmse')['category']],
    legend=False
)
axes[0,0].set_title('RMSE Vergleich (niedriger = besser)', fontsize=14, fontweight='bold')
axes[0,0].set_xlabel('RMSE [MW]')

# MAPE
all_results.sort_values('test_mape')[['test_mape', 'category']].plot(
    kind='barh', x=all_results.sort_values('test_mape').index, y='test_mape',
    ax=axes[0,1], color=['lightcoral' if c == 'Baseline' else 'lightgreen' for c in all_results.sort_values('test_mape')['category']],
    legend=False
)
axes[0,1].set_title('MAPE Vergleich (niedriger = besser)', fontsize=14, fontweight='bold')
axes[0,1].set_xlabel('MAPE [%]')

# R¬≤
all_results.sort_values('test_r2', ascending=False)[['test_r2', 'category']].plot(
    kind='barh', x=all_results.sort_values('test_r2', ascending=False).index, y='test_r2',
    ax=axes[1,0], color=['lightcoral' if c == 'Baseline' else 'lightgreen' for c in all_results.sort_values('test_r2', ascending=False)['category']],
    legend=False
)
axes[1,0].set_title('R¬≤ Score Vergleich (h√∂her = besser)', fontsize=14, fontweight='bold')
axes[1,0].set_xlabel('R¬≤ Score')
axes[1,0].axvline(0.9, color='red', linestyle='--', alpha=0.5, label='0.9 Threshold')
axes[1,0].legend()

# MAE
all_results.sort_values('test_mae')[['test_mae', 'category']].plot(
    kind='barh', x=all_results.sort_values('test_mae').index, y='test_mae',
    ax=axes[1,1], color=['lightcoral' if c == 'Baseline' else 'lightgreen' for c in all_results.sort_values('test_mae')['category']],
    legend=False
)
axes[1,1].set_title('MAE Vergleich (niedriger = besser)', fontsize=14, fontweight='bold')
axes[1,1].set_xlabel('MAE [MW]')

plt.tight_layout()
plt.show()

print("\nüìà Verbesserung durch ML-Modelle:")
best_baseline = baseline.iloc[0]
best_ml = ml_tree.iloc[0]
print(f"   RMSE-Verbesserung: {(1 - best_ml['test_rmse']/best_baseline['test_rmse'])*100:.1f}%")
print(f"   MAPE-Verbesserung: {(1 - best_ml['test_mape']/best_baseline['test_mape'])*100:.1f}%")
print(f"   R¬≤-Verbesserung:   {best_baseline['test_r2']:.4f} ‚Üí {best_ml['test_r2']:.4f}")

## 4Ô∏è‚É£ Feature Importance (ML-Modelle)

Welche Features sind am wichtigsten f√ºr die Vorhersage?

In [None]:
try:
    feature_imp = pd.read_csv(RESULTS_DIR / 'solar_feature_importance.csv', index_col=0)
    
    print("="*80)
    print("TOP 15 WICHTIGSTE FEATURES")
    print("="*80)
    display(feature_imp.head(15))
    
    # Visualisierung
    plt.figure(figsize=(12, 6))
    feature_imp.head(15).plot(kind='barh', y='importance', ax=plt.gca(), color='steelblue')
    plt.title('Top 15 Feature Importance (LightGBM)', fontsize=14, fontweight='bold')
    plt.xlabel('Importance Score')
    plt.gca().invert_yaxis()
    plt.tight_layout()
    plt.show()
    
except FileNotFoundError:
    print("‚ö†Ô∏è Feature Importance Datei nicht gefunden")

## 5Ô∏è‚É£ Zusammenfassung

Key Takeaways aus der Analyse

In [None]:
print("="*80)
print("ZUSAMMENFASSUNG - SOLAR ENERGIE VORHERSAGE")
print("="*80)

print("\nüéØ BESTE MODELLE:")
print(f"\n1. {ml_tree.index[0]} (ML Tree)")
print(f"   ‚Ä¢ RMSE: {ml_tree.iloc[0]['test_rmse']:.2f} MW")
print(f"   ‚Ä¢ MAPE: {ml_tree.iloc[0]['test_mape']:.2f}%")
print(f"   ‚Ä¢ R¬≤:   {ml_tree.iloc[0]['test_r2']:.4f}")

print(f"\n2. {ml_tree.index[1]} (ML Tree)")
print(f"   ‚Ä¢ RMSE: {ml_tree.iloc[1]['test_rmse']:.2f} MW")
print(f"   ‚Ä¢ MAPE: {ml_tree.iloc[1]['test_mape']:.2f}%")
print(f"   ‚Ä¢ R¬≤:   {ml_tree.iloc[1]['test_r2']:.4f}")

print(f"\n3. {ml_tree.index[2]} (ML Tree)")
print(f"   ‚Ä¢ RMSE: {ml_tree.iloc[2]['test_rmse']:.2f} MW")
print(f"   ‚Ä¢ MAPE: {ml_tree.iloc[2]['test_mape']:.2f}%")
print(f"   ‚Ä¢ R¬≤:   {ml_tree.iloc[2]['test_r2']:.4f}")

print("\n\nüìä DATENQUALIT√ÑT:")
print("   ‚úÖ Korrekte Datenquelle: SMARD API Filter 4068")
print("   ‚úÖ Zeitraum: 2022-01-01 bis 2024-12-31 (3 Jahre)")
print("   ‚úÖ Datenpunkte: 26.257 st√ºndliche Messungen")
print("   ‚úÖ Features: 31 (Lag, Rolling, Zeit-Features)")

print("\n\nüîë WICHTIGSTE ERKENNTNISSE:")
print("   1. ML Tree-Modelle erreichen R¬≤ > 0.98 (exzellent!)")
print("   2. Top-Features: lag_1, lag_2, hour (Tag/Nacht-Zyklus)")
print("   3. Verbesserung gegen√ºber Baseline: ~89% weniger RMSE")
print("   4. MAPE < 4% zeigt hohe Vorhersagegenauigkeit")

print("\n" + "="*80)

## 6Ô∏è‚É£ Export f√ºr Pr√§sentation

Speichern Sie die Ergebnistabelle als sch√∂nes Format f√ºr Pr√§sentationen

In [None]:
# Erstelle formatierte Zusammenfassung
summary = pd.concat([
    baseline.head(3),
    ml_tree.head(4)
]).round(2)

summary['category'] = ['Baseline']*3 + ['ML Tree']*4

# Als CSV speichern
summary.to_csv(RESULTS_DIR / 'PRESENTATION_SUMMARY.csv')

# Auch als Excel mit Formatierung
try:
    with pd.ExcelWriter(RESULTS_DIR / 'PRESENTATION_SUMMARY.xlsx', engine='openpyxl') as writer:
        summary.to_excel(writer, sheet_name='Results')
    print("‚úÖ Ergebnisse gespeichert als:")
    print(f"   ‚Ä¢ {RESULTS_DIR / 'PRESENTATION_SUMMARY.csv'}")
    print(f"   ‚Ä¢ {RESULTS_DIR / 'PRESENTATION_SUMMARY.xlsx'}")
except ImportError:
    print("‚úÖ CSV gespeichert (f√ºr Excel: pip install openpyxl)")
    print(f"   ‚Ä¢ {RESULTS_DIR / 'PRESENTATION_SUMMARY.csv'}")

display(summary.style.background_gradient(cmap='RdYlGn_r', subset=['test_rmse', 'test_mape', 'test_mae']))