# üöÄ LSTM Optimization - Google Colab GPU Edition

**Systematisches LSTM-Tuning mit GPU-Beschleunigung**

**Setup:**
- Runtime ‚Üí Change runtime type ‚Üí GPU (T4 oder A100)
- ~10-50x schneller als CPU!

In [None]:
# Check GPU
import tensorflow as tf
print("TensorFlow version:", tf.__version__)
print("GPU Available:", tf.config.list_physical_devices('GPU'))
print("\nüöÄ GPU should show above!")

In [None]:
# Clone Repository
!git clone https://github.com/chradden/AdvancedTimeSeriesPrediction.git
%cd AdvancedTimeSeriesPrediction/energy-timeseries-project

In [None]:
# Install Dependencies
!pip install -q pandas numpy matplotlib seaborn scikit-learn tensorflow keras

In [None]:
# Imports
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau
import time

# Set seeds
np.random.seed(42)
tf.random.set_seed(42)

# GPU Config
gpus = tf.config.list_physical_devices('GPU')
if gpus:
    try:
        for gpu in gpus:
            tf.config.experimental.set_memory_growth(gpu, True)
        print(f"‚úÖ GPU configured: {len(gpus)} device(s)")
    except RuntimeError as e:
        print(e)
else:
    print("‚ö†Ô∏è No GPU found - training will be slow!")

## ‚öôÔ∏è Configuration - Zeitreihe ausw√§hlen

**√Ñndere hier die Zeitreihe:**
- `'solar'` - Solarenergie-Erzeugung
- `'wind_offshore'` - Offshore-Windenergie  
- `'wind_onshore'` - Onshore-Windenergie
- `'price'` - Strompreise
- `'consumption'` - Energieverbrauch

Oder setze `RUN_ALL_SERIES = True` um alle Zeitreihen nacheinander zu verarbeiten!

In [None]:
# ============================================================================
# CONFIGURATION - ZEITREIHE AUSW√ÑHLEN
# ============================================================================

# Option 1: Einzelne Zeitreihe (Standard)
SERIES_NAME = 'solar'  # √Ñndere hier: 'solar', 'wind_offshore', 'wind_onshore', 'price', 'consumption'

# Option 2: Alle Zeitreihen nacheinander verarbeiten
RUN_ALL_SERIES = False  # Setze auf True f√ºr Batch-Verarbeitung

# Verf√ºgbare Zeitreihen
AVAILABLE_SERIES = ['solar', 'wind_offshore', 'wind_onshore', 'price', 'consumption']

# Serie(n) bestimmen
if RUN_ALL_SERIES:
    series_to_process = AVAILABLE_SERIES
    print("üîÑ MODE: Alle Zeitreihen werden nacheinander verarbeitet")
    print(f"   ‚Üí {len(series_to_process)} Serien: {', '.join(series_to_process)}")
else:
    series_to_process = [SERIES_NAME]
    print(f"üìä MODE: Einzelne Zeitreihe")
    print(f"   ‚Üí Serie: {SERIES_NAME}")

print(f"\n‚úÖ Konfiguration abgeschlossen!")

In [None]:
# Load processed data for current series
series_name = series_to_process[0] if not RUN_ALL_SERIES else SERIES_NAME

train_df = pd.read_csv(f'data/processed/{series_name}_train.csv')
val_df = pd.read_csv(f'data/processed/{series_name}_val.csv')
test_df = pd.read_csv(f'data/processed/{series_name}_test.csv')

print(f"üìÇ Loading data for: {series_name.upper()}")
print(f"   Train: {len(train_df)} | Val: {len(val_df)} | Test: {len(test_df)}")
print(f"   Columns: {train_df.columns.tolist()[:10]}...")

In [None]:
# Determine value column
value_col = [c for c in train_df.columns if c in ['solar', 'price', 'value', 
                                                     'wind_offshore', 'wind_onshore', 'consumption']][0]
feature_cols = [c for c in train_df.columns if c not in ['timestamp', value_col]]

print(f"Value column: {value_col}")
print(f"Features: {len(feature_cols)}")

## üîß Prepare Data

In [None]:
def create_sequences(data, target, seq_length):
    """Create sequences for LSTM"""
    X, y = [], []
    for i in range(len(data) - seq_length):
        X.append(data[i:i + seq_length])
        y.append(target[i + seq_length])
    return np.array(X), np.array(y)

# Scale data
scaler_X = StandardScaler()
scaler_y = StandardScaler()

X_train = scaler_X.fit_transform(train_df[feature_cols])
y_train = scaler_y.fit_transform(train_df[[value_col]])

X_val = scaler_X.transform(val_df[feature_cols])
y_val = scaler_y.transform(val_df[[value_col]])

X_test = scaler_X.transform(test_df[feature_cols])
y_test_orig = test_df[value_col].values

print(f"‚úÖ Data scaled: X_train shape = {X_train.shape}")

## üß™ Experiment 1: Baseline LSTM

In [None]:
seq_length = 24

X_train_seq, y_train_seq = create_sequences(X_train, y_train.flatten(), seq_length)
X_val_seq, y_val_seq = create_sequences(X_val, y_val.flatten(), seq_length)
X_test_seq, _ = create_sequences(X_test, np.zeros(len(X_test)), seq_length)
y_test_seq = y_test_orig[seq_length:]

print(f"Sequences: Train={X_train_seq.shape}, Val={X_val_seq.shape}, Test={X_test_seq.shape}")

In [None]:
# Build Baseline LSTM
model = keras.Sequential([
    layers.LSTM(64, return_sequences=True, input_shape=(seq_length, X_train.shape[1])),
    layers.Dropout(0.2),
    layers.LSTM(32),
    layers.Dropout(0.2),
    layers.Dense(32, activation='relu'),
    layers.Dense(1)
])

model.compile(optimizer='adam', loss='mse', metrics=['mae'])
model.summary()

In [None]:
# Train
early_stop = EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)
reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=5, min_lr=1e-6)

start = time.time()
history = model.fit(
    X_train_seq, y_train_seq,
    validation_data=(X_val_seq, y_val_seq),
    epochs=50,
    batch_size=64,
    callbacks=[early_stop, reduce_lr],
    verbose=1
)
train_time = time.time() - start

print(f"\n‚úÖ Training completed in {train_time:.1f}s ({train_time/60:.1f} min)")

In [None]:
# Plot training history
plt.figure(figsize=(14, 5))

plt.subplot(1, 2, 1)
plt.plot(history.history['loss'], label='Train Loss')
plt.plot(history.history['val_loss'], label='Val Loss')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.legend()
plt.title('Training History')
plt.grid(alpha=0.3)

plt.subplot(1, 2, 2)
plt.plot(history.history['mae'], label='Train MAE')
plt.plot(history.history['val_mae'], label='Val MAE')
plt.xlabel('Epoch')
plt.ylabel('MAE')
plt.legend()
plt.title('MAE History')
plt.grid(alpha=0.3)

plt.tight_layout()
plt.show()

In [None]:
# Evaluate
y_pred_scaled = model.predict(X_test_seq).flatten()
y_pred = scaler_y.inverse_transform(y_pred_scaled.reshape(-1, 1)).flatten()

rmse = np.sqrt(mean_squared_error(y_test_seq, y_pred))
mae = mean_absolute_error(y_test_seq, y_pred)
r2 = r2_score(y_test_seq, y_pred)

print(f"\nüìä BASELINE LSTM RESULTS:")
print(f"   R¬≤ = {r2:.4f}")
print(f"   RMSE = {rmse:.2f}")
print(f"   MAE = {mae:.2f}")

## üöÄ Experiment 2: Bi-LSTM

In [None]:
# Build Bi-LSTM
model_bilstm = keras.Sequential([
    layers.Bidirectional(layers.LSTM(64, return_sequences=True), input_shape=(seq_length, X_train.shape[1])),
    layers.Dropout(0.2),
    layers.Bidirectional(layers.LSTM(32)),
    layers.Dropout(0.2),
    layers.Dense(32, activation='relu'),
    layers.Dense(1)
])

model_bilstm.compile(optimizer='adam', loss='mse', metrics=['mae'])
model_bilstm.summary()

In [None]:
# Train Bi-LSTM
start = time.time()
history_bilstm = model_bilstm.fit(
    X_train_seq, y_train_seq,
    validation_data=(X_val_seq, y_val_seq),
    epochs=50,
    batch_size=64,
    callbacks=[early_stop, reduce_lr],
    verbose=1
)
train_time_bilstm = time.time() - start

print(f"\n‚úÖ Bi-LSTM training completed in {train_time_bilstm:.1f}s ({train_time_bilstm/60:.1f} min)")

In [None]:
# Evaluate Bi-LSTM
y_pred_bilstm_scaled = model_bilstm.predict(X_test_seq).flatten()
y_pred_bilstm = scaler_y.inverse_transform(y_pred_bilstm_scaled.reshape(-1, 1)).flatten()

rmse_bilstm = np.sqrt(mean_squared_error(y_test_seq, y_pred_bilstm))
mae_bilstm = mean_absolute_error(y_test_seq, y_pred_bilstm)
r2_bilstm = r2_score(y_test_seq, y_pred_bilstm)

print(f"\nüìä BI-LSTM RESULTS:")
print(f"   R¬≤ = {r2_bilstm:.4f}")
print(f"   RMSE = {rmse_bilstm:.2f}")
print(f"   MAE = {mae_bilstm:.2f}")
print(f"\nüéØ Improvement: {((r2_bilstm - r2) / abs(r2)) * 100:+.2f}% R¬≤")

## üìä Comparison

In [None]:
# Results DataFrame
results = pd.DataFrame([
    {'Model': 'LSTM', 'R¬≤': r2, 'RMSE': rmse, 'MAE': mae, 'Time': train_time},
    {'Model': 'Bi-LSTM', 'R¬≤': r2_bilstm, 'RMSE': rmse_bilstm, 'MAE': mae_bilstm, 'Time': train_time_bilstm}
])

print("\nüèÜ COMPARISON:")
print(results.to_string(index=False))

# Save
results.to_csv(f'results/metrics/lstm_colab_{series_name}.csv', index=False)
print(f"\n‚úÖ Results saved to: results/metrics/lstm_colab_{series_name}.csv")

In [None]:
# Visual comparison
fig, axes = plt.subplots(1, 2, figsize=(16, 5))

# R¬≤ Comparison
axes[0].bar(results['Model'], results['R¬≤'], color=['#3498db', '#e74c3c'])
axes[0].set_ylabel('R¬≤ Score')
axes[0].set_title('R¬≤ Comparison', fontweight='bold')
axes[0].set_ylim([0.85, 1.0])
axes[0].grid(alpha=0.3, axis='y')

# RMSE Comparison
axes[1].bar(results['Model'], results['RMSE'], color=['#3498db', '#e74c3c'])
axes[1].set_ylabel('RMSE')
axes[1].set_title('RMSE Comparison', fontweight='bold')
axes[1].grid(alpha=0.3, axis='y')

plt.tight_layout()
plt.savefig(f'results/figures/lstm_colab_comparison_{series_name}.png', dpi=150, bbox_inches='tight')
plt.show()

print(f"‚úÖ Plot saved to: results/figures/lstm_colab_comparison_{series_name}.png")

## üíæ Save Best Model

In [None]:
# Save best model
best_model = model_bilstm if r2_bilstm > r2 else model
best_name = 'Bi-LSTM' if r2_bilstm > r2 else 'LSTM'

best_model.save(f'models/lstm_best_{series_name}.keras')
print(f"‚úÖ Best model ({best_name}) saved to: models/lstm_best_{series_name}.keras")

## üé® Experiment 3: Generative Models

### Autoencoder f√ºr Time Series Forecasting
Autoencoders lernen kompakte Repr√§sentationen und k√∂nnen Anomalien erkennen.

## üéØ Summary

**GPU Speedup Check:**
- If you see ~10-50x faster training vs CPU, GPU is working!
- Expected on CPU: ~100-200s per epoch
- Expected on GPU: ~2-5s per epoch

**Next Steps:**
1. Try longer sequences (48h, 168h)
2. Experiment with more units (128, 256)
3. Add Attention mechanism
4. Test on other time series (price, wind_offshore)

In [None]:
# Build LSTM Autoencoder
latent_dim = 16

# Encoder
encoder_inputs = keras.Input(shape=(seq_length, X_train.shape[1]))
x = layers.LSTM(64, return_sequences=True)(encoder_inputs)
x = layers.LSTM(32, return_sequences=False)(x)
encoder_outputs = layers.Dense(latent_dim, activation='relu')(x)

# Decoder
decoder_inputs = layers.RepeatVector(seq_length)(encoder_outputs)
x = layers.LSTM(32, return_sequences=True)(decoder_inputs)
x = layers.LSTM(64, return_sequences=True)(x)
decoder_outputs = layers.TimeDistributed(layers.Dense(X_train.shape[1]))(x)

# Autoencoder
autoencoder = keras.Model(encoder_inputs, decoder_outputs, name='autoencoder')
autoencoder.compile(optimizer='adam', loss='mse')

print("üîß Autoencoder Architecture:")
autoencoder.summary()

In [None]:
# Train Autoencoder
start = time.time()
history_ae = autoencoder.fit(
    X_train_seq, X_train_seq,  # Reconstruct input
    validation_data=(X_val_seq, X_val_seq),
    epochs=30,
    batch_size=64,
    callbacks=[early_stop],
    verbose=1
)
train_time_ae = time.time() - start
print(f"\n‚úÖ Autoencoder training completed in {train_time_ae:.1f}s")

In [None]:
# Build forecasting model on top of encoder
encoder_model = keras.Model(encoder_inputs, encoder_outputs, name='encoder')

# Freeze encoder
for layer in encoder_model.layers:
    layer.trainable = False

# Add forecasting head
forecast_inputs = keras.Input(shape=(seq_length, X_train.shape[1]))
encoded = encoder_model(forecast_inputs)
x = layers.Dense(32, activation='relu')(encoded)
x = layers.Dropout(0.2)(x)
forecast_output = layers.Dense(1)(x)

model_ae_forecast = keras.Model(forecast_inputs, forecast_output, name='ae_forecast')
model_ae_forecast.compile(optimizer='adam', loss='mse', metrics=['mae'])

print("üîß AE-Forecast Model:")
model_ae_forecast.summary()

In [None]:
# Train forecasting model
start = time.time()
history_ae_forecast = model_ae_forecast.fit(
    X_train_seq, y_train_seq,
    validation_data=(X_val_seq, y_val_seq),
    epochs=50,
    batch_size=64,
    callbacks=[early_stop, reduce_lr],
    verbose=1
)
train_time_ae_forecast = time.time() - start

# Evaluate
y_pred_ae_scaled = model_ae_forecast.predict(X_test_seq).flatten()
y_pred_ae = scaler_y.inverse_transform(y_pred_ae_scaled.reshape(-1, 1)).flatten()

rmse_ae = np.sqrt(mean_squared_error(y_test_seq, y_pred_ae))
mae_ae = mean_absolute_error(y_test_seq, y_pred_ae)
r2_ae = r2_score(y_test_seq, y_pred_ae)

print(f"\nüìä AUTOENCODER-FORECAST RESULTS:")
print(f"   R¬≤ = {r2_ae:.4f}")
print(f"   RMSE = {rmse_ae:.2f}")
print(f"   MAE = {mae_ae:.2f}")
print(f"   Time = {train_time_ae_forecast:.1f}s")

### VAE (Variational Autoencoder)
VAE lernt probabilistische latente Repr√§sentationen - n√ºtzlich f√ºr Unsicherheitsquantifizierung.

In [None]:
# Sampling layer for VAE
class Sampling(layers.Layer):
    def call(self, inputs):
        z_mean, z_log_var = inputs
        batch = tf.shape(z_mean)[0]
        dim = tf.shape(z_mean)[1]
        epsilon = tf.random.normal(shape=(batch, dim))
        return z_mean + tf.exp(0.5 * z_log_var) * epsilon

# VAE Encoder
vae_inputs = keras.Input(shape=(seq_length, X_train.shape[1]))
x = layers.LSTM(64, return_sequences=True)(vae_inputs)
x = layers.LSTM(32)(x)
z_mean = layers.Dense(latent_dim, name='z_mean')(x)
z_log_var = layers.Dense(latent_dim, name='z_log_var')(x)
z = Sampling()([z_mean, z_log_var])

vae_encoder = keras.Model(vae_inputs, [z_mean, z_log_var, z], name='vae_encoder')

# VAE Decoder
latent_inputs = keras.Input(shape=(latent_dim,))
x = layers.RepeatVector(seq_length)(latent_inputs)
x = layers.LSTM(32, return_sequences=True)(x)
x = layers.LSTM(64, return_sequences=True)(x)
vae_outputs = layers.TimeDistributed(layers.Dense(X_train.shape[1]))(x)

vae_decoder = keras.Model(latent_inputs, vae_outputs, name='vae_decoder')

# VAE full model
class VAE(keras.Model):
    def __init__(self, encoder, decoder, **kwargs):
        super().__init__(**kwargs)
        self.encoder = encoder
        self.decoder = decoder
        self.total_loss_tracker = keras.metrics.Mean(name="total_loss")
        self.reconstruction_loss_tracker = keras.metrics.Mean(name="reconstruction_loss")
        self.kl_loss_tracker = keras.metrics.Mean(name="kl_loss")
    
    @property
    def metrics(self):
        return [
            self.total_loss_tracker,
            self.reconstruction_loss_tracker,
            self.kl_loss_tracker,
        ]
    
    def train_step(self, data):
        with tf.GradientTape() as tape:
            z_mean, z_log_var, z = self.encoder(data)
            reconstruction = self.decoder(z)
            reconstruction_loss = tf.reduce_mean(
                tf.reduce_sum(
                    keras.losses.mean_squared_error(data, reconstruction), axis=1
                )
            )
            kl_loss = -0.5 * (1 + z_log_var - tf.square(z_mean) - tf.exp(z_log_var))
            kl_loss = tf.reduce_mean(tf.reduce_sum(kl_loss, axis=1))
            total_loss = reconstruction_loss + kl_loss
        
        grads = tape.gradient(total_loss, self.trainable_weights)
        self.optimizer.apply_gradients(zip(grads, self.trainable_weights))
        self.total_loss_tracker.update_state(total_loss)
        self.reconstruction_loss_tracker.update_state(reconstruction_loss)
        self.kl_loss_tracker.update_state(kl_loss)
        return {
            "loss": self.total_loss_tracker.result(),
            "reconstruction_loss": self.reconstruction_loss_tracker.result(),
            "kl_loss": self.kl_loss_tracker.result(),
        }

vae = VAE(vae_encoder, vae_decoder)
vae.compile(optimizer='adam')

print("üîß VAE built successfully!")

In [None]:
# Train VAE
start = time.time()
history_vae = vae.fit(
    X_train_seq, X_train_seq,
    epochs=30,
    batch_size=64,
    verbose=1
)
train_time_vae = time.time() - start
print(f"\n‚úÖ VAE training completed in {train_time_vae:.1f}s")

# Build forecasting model on VAE encoder
vae_encoder_mean = keras.Model(vae_inputs, z_mean, name='vae_encoder_mean')

for layer in vae_encoder_mean.layers:
    layer.trainable = False

forecast_inputs_vae = keras.Input(shape=(seq_length, X_train.shape[1]))
encoded_vae = vae_encoder_mean(forecast_inputs_vae)
x = layers.Dense(32, activation='relu')(encoded_vae)
x = layers.Dropout(0.2)(x)
forecast_output_vae = layers.Dense(1)(x)

model_vae_forecast = keras.Model(forecast_inputs_vae, forecast_output_vae, name='vae_forecast')
model_vae_forecast.compile(optimizer='adam', loss='mse', metrics=['mae'])

# Train
start = time.time()
history_vae_forecast = model_vae_forecast.fit(
    X_train_seq, y_train_seq,
    validation_data=(X_val_seq, y_val_seq),
    epochs=50,
    batch_size=64,
    callbacks=[early_stop, reduce_lr],
    verbose=1
)
train_time_vae_forecast = time.time() - start

# Evaluate
y_pred_vae_scaled = model_vae_forecast.predict(X_test_seq).flatten()
y_pred_vae = scaler_y.inverse_transform(y_pred_vae_scaled.reshape(-1, 1)).flatten()

rmse_vae = np.sqrt(mean_squared_error(y_test_seq, y_pred_vae))
mae_vae = mean_absolute_error(y_test_seq, y_pred_vae)
r2_vae = r2_score(y_test_seq, y_pred_vae)

print(f"\nüìä VAE-FORECAST RESULTS:")
print(f"   R¬≤ = {r2_vae:.4f}")
print(f"   RMSE = {rmse_vae:.2f}")
print(f"   MAE = {mae_vae:.2f}")
print(f"   Time = {train_time_vae_forecast:.1f}s")

## ‚ö° Experiment 4: Advanced Deep Learning Models

### N-BEATS (Neural Basis Expansion Analysis for Time Series)
State-of-the-art f√ºr univariate Zeitreihen - speziell designed f√ºr Forecasting!

In [None]:
# Install Darts (for N-BEATS, N-HiTS)
!pip install -q darts

In [None]:
# Import Darts
from darts import TimeSeries
from darts.models import NBEATSModel, NHiTSModel
from darts.dataprocessing.transformers import Scaler

print("‚úÖ Darts imported successfully!")

In [None]:
# Prepare data for Darts
train_df_ts = train_df.copy()
train_df_ts['timestamp'] = pd.to_datetime(train_df_ts['timestamp'])
train_df_ts = train_df_ts.set_index('timestamp')

val_df_ts = val_df.copy()
val_df_ts['timestamp'] = pd.to_datetime(val_df_ts['timestamp'])
val_df_ts = val_df_ts.set_index('timestamp')

test_df_ts = test_df.copy()
test_df_ts['timestamp'] = pd.to_datetime(test_df_ts['timestamp'])
test_df_ts = test_df_ts.set_index('timestamp')

# Create TimeSeries objects
ts_train = TimeSeries.from_dataframe(train_df_ts, value_cols=value_col, freq='H')
ts_val = TimeSeries.from_dataframe(val_df_ts, value_cols=value_col, freq='H')
ts_test = TimeSeries.from_dataframe(test_df_ts, value_cols=value_col, freq='H')

# Scale
scaler_darts = Scaler()
ts_train_scaled = scaler_darts.fit_transform(ts_train)
ts_val_scaled = scaler_darts.transform(ts_val)
ts_test_scaled = scaler_darts.transform(ts_test)

print(f"‚úÖ Darts TimeSeries created: {len(ts_train)} train samples")

In [None]:
# Build N-BEATS model
model_nbeats = NBEATSModel(
    input_chunk_length=168,  # 1 week lookback
    output_chunk_length=24,  # 1 day forecast
    num_stacks=30,
    num_blocks=1,
    num_layers=4,
    layer_widths=256,
    expansion_coefficient_dim=5,
    n_epochs=50,
    batch_size=64,
    optimizer_kwargs={'lr': 1e-3},
    pl_trainer_kwargs={
        'accelerator': 'gpu',
        'devices': 1,
        'enable_progress_bar': True,
    },
    model_name='nbeats_solar',
    force_reset=True,
    save_checkpoints=True
)

print("üîß N-BEATS model configured!")

In [None]:
# Train N-BEATS
print("üöÄ Training N-BEATS (this will be FAST on GPU!)...")
start = time.time()

model_nbeats.fit(
    series=ts_train_scaled,
    val_series=ts_val_scaled,
    verbose=True
)

train_time_nbeats = time.time() - start
print(f"\n‚úÖ N-BEATS training completed in {train_time_nbeats:.1f}s ({train_time_nbeats/60:.1f} min)")

In [None]:
# Predict with N-BEATS
n_pred = len(ts_test)
pred_nbeats_scaled = model_nbeats.predict(n=n_pred, series=ts_train_scaled)
pred_nbeats = scaler_darts.inverse_transform(pred_nbeats_scaled)

# Extract values
y_pred_nbeats = pred_nbeats.values().flatten()
y_test_nbeats = ts_test.values().flatten()

# Ensure same length
min_len = min(len(y_pred_nbeats), len(y_test_nbeats))
y_pred_nbeats = y_pred_nbeats[:min_len]
y_test_nbeats = y_test_nbeats[:min_len]

# Evaluate
rmse_nbeats = np.sqrt(mean_squared_error(y_test_nbeats, y_pred_nbeats))
mae_nbeats = mean_absolute_error(y_test_nbeats, y_pred_nbeats)
r2_nbeats = r2_score(y_test_nbeats, y_pred_nbeats)

print(f"\nüìä N-BEATS RESULTS:")
print(f"   R¬≤ = {r2_nbeats:.4f}")
print(f"   RMSE = {rmse_nbeats:.2f}")
print(f"   MAE = {mae_nbeats:.2f}")
print(f"   Time = {train_time_nbeats:.1f}s")

### N-HiTS (Neural Hierarchical Interpolation for Time Series)
Verbesserte Version von N-BEATS - noch besser f√ºr lange Horizonte!

In [None]:
# Build N-HiTS model
model_nhits = NHiTSModel(
    input_chunk_length=168,  # 1 week lookback
    output_chunk_length=24,  # 1 day forecast
    num_stacks=3,
    num_blocks=1,
    num_layers=2,
    layer_widths=512,
    n_epochs=50,
    batch_size=64,
    optimizer_kwargs={'lr': 1e-3},
    pl_trainer_kwargs={
        'accelerator': 'gpu',
        'devices': 1,
        'enable_progress_bar': True,
    },
    model_name='nhits_solar',
    force_reset=True,
    save_checkpoints=True
)

print("üîß N-HiTS model configured!")

In [None]:
# Train N-HiTS
print("üöÄ Training N-HiTS...")
start = time.time()

model_nhits.fit(
    series=ts_train_scaled,
    val_series=ts_val_scaled,
    verbose=True
)

train_time_nhits = time.time() - start
print(f"\n‚úÖ N-HiTS training completed in {train_time_nhits:.1f}s ({train_time_nhits/60:.1f} min)")

In [None]:
# Predict with N-HiTS
pred_nhits_scaled = model_nhits.predict(n=n_pred, series=ts_train_scaled)
pred_nhits = scaler_darts.inverse_transform(pred_nhits_scaled)

# Extract values
y_pred_nhits = pred_nhits.values().flatten()
y_test_nhits = ts_test.values().flatten()

# Ensure same length
min_len = min(len(y_pred_nhits), len(y_test_nhits))
y_pred_nhits = y_pred_nhits[:min_len]
y_test_nhits = y_test_nhits[:min_len]

# Evaluate
rmse_nhits = np.sqrt(mean_squared_error(y_test_nhits, y_pred_nhits))
mae_nhits = mean_absolute_error(y_test_nhits, y_pred_nhits)
r2_nhits = r2_score(y_test_nhits, y_pred_nhits)

print(f"\nüìä N-HiTS RESULTS:")
print(f"   R¬≤ = {r2_nhits:.4f}")
print(f"   RMSE = {rmse_nhits:.2f}")
print(f"   MAE = {mae_nhits:.2f}")
print(f"   Time = {train_time_nhits:.1f}s")

## üìä Final Comparison - All Models

In [None]:
# Comprehensive results
all_results = pd.DataFrame([
    {'Model': 'LSTM', 'Category': 'Basic DL', 'R¬≤': r2, 'RMSE': rmse, 'MAE': mae, 'Time': train_time},
    {'Model': 'Bi-LSTM', 'Category': 'Basic DL', 'R¬≤': r2_bilstm, 'RMSE': rmse_bilstm, 'MAE': mae_bilstm, 'Time': train_time_bilstm},
    {'Model': 'Autoencoder', 'Category': 'Generative', 'R¬≤': r2_ae, 'RMSE': rmse_ae, 'MAE': mae_ae, 'Time': train_time_ae_forecast},
    {'Model': 'VAE', 'Category': 'Generative', 'R¬≤': r2_vae, 'RMSE': rmse_vae, 'MAE': mae_vae, 'Time': train_time_vae_forecast},
    {'Model': 'N-BEATS', 'Category': 'Advanced', 'R¬≤': r2_nbeats, 'RMSE': rmse_nbeats, 'MAE': mae_nbeats, 'Time': train_time_nbeats},
    {'Model': 'N-HiTS', 'Category': 'Advanced', 'R¬≤': r2_nhits, 'RMSE': rmse_nhits, 'MAE': mae_nhits, 'Time': train_time_nhits}
])

# Sort by R¬≤
all_results = all_results.sort_values('R¬≤', ascending=False)

print("\n" + "="*80)
print("üèÜ COMPREHENSIVE MODEL COMPARISON")
print("="*80)
print("\n" + all_results.to_string(index=False))

# Identify best
best = all_results.iloc[0]
print(f"\nü•á BEST MODEL: {best['Model']} ({best['Category']})")
print(f"   R¬≤ = {best['R¬≤']:.4f}")
print(f"   RMSE = {best['RMSE']:.2f}")
print(f"   MAE = {best['MAE']:.2f}")
print(f"   Training Time = {best['Time']:.1f}s")

# Save results
all_results.to_csv(f'results/metrics/deep_learning_comprehensive_{series_name}.csv', index=False)
print(f"\n‚úÖ Results saved to: results/metrics/deep_learning_comprehensive_{series_name}.csv")

In [None]:
# Comprehensive visualization
fig, axes = plt.subplots(2, 2, figsize=(18, 12))

# R¬≤ by Model
axes[0, 0].barh(all_results['Model'], all_results['R¬≤'], 
                color=['#e74c3c', '#3498db', '#2ecc71', '#f39c12', '#9b59b6', '#1abc9c'])
axes[0, 0].set_xlabel('R¬≤ Score', fontweight='bold')
axes[0, 0].set_title('R¬≤ Score by Model', fontweight='bold', fontsize=12)
axes[0, 0].set_xlim([0.85, 1.0])
axes[0, 0].grid(alpha=0.3, axis='x')

# RMSE by Model
axes[0, 1].barh(all_results['Model'], all_results['RMSE'],
                color=['#e74c3c', '#3498db', '#2ecc71', '#f39c12', '#9b59b6', '#1abc9c'])
axes[0, 1].set_xlabel('RMSE', fontweight='bold')
axes[0, 1].set_title('RMSE by Model', fontweight='bold', fontsize=12)
axes[0, 1].grid(alpha=0.3, axis='x')

# Training Time by Model
axes[1, 0].barh(all_results['Model'], all_results['Time'],
                color=['#e74c3c', '#3498db', '#2ecc71', '#f39c12', '#9b59b6', '#1abc9c'])
axes[1, 0].set_xlabel('Training Time (seconds)', fontweight='bold')
axes[1, 0].set_title('Training Time by Model', fontweight='bold', fontsize=12)
axes[1, 0].grid(alpha=0.3, axis='x')

# R¬≤ by Category
category_r2 = all_results.groupby('Category')['R¬≤'].mean().sort_values(ascending=False)
axes[1, 1].bar(category_r2.index, category_r2.values,
               color=['#9b59b6', '#2ecc71', '#3498db'])
axes[1, 1].set_ylabel('Average R¬≤', fontweight='bold')
axes[1, 1].set_title('Average R¬≤ by Category', fontweight='bold', fontsize=12)
axes[1, 1].set_ylim([0.85, 1.0])
axes[1, 1].grid(alpha=0.3, axis='y')
axes[1, 1].tick_params(axis='x', rotation=15)

plt.tight_layout()
plt.savefig(f'results/figures/deep_learning_comprehensive_{series_name}.png', 
            dpi=150, bbox_inches='tight')
plt.show()

print(f"‚úÖ Comprehensive plot saved to: results/figures/deep_learning_comprehensive_{series_name}.png")

In [None]:
# Prediction comparison plot
fig, axes = plt.subplots(3, 2, figsize=(20, 15))
axes = axes.flatten()

# Show first 500 test points for clarity
n_show = 500
x_plot = range(n_show)

models_data = [
    ('LSTM', y_pred[:n_show]),
    ('Bi-LSTM', y_pred_bilstm[:n_show]),
    ('Autoencoder', y_pred_ae[:n_show]),
    ('VAE', y_pred_vae[:n_show]),
    ('N-BEATS', y_pred_nbeats[:n_show]),
    ('N-HiTS', y_pred_nhits[:n_show])
]

for idx, (model_name, predictions) in enumerate(models_data):
    axes[idx].plot(x_plot, y_test_seq[:n_show], label='Actual', color='black', linewidth=1, alpha=0.7)
    axes[idx].plot(x_plot, predictions, label=f'{model_name} Prediction', 
                   color=['#e74c3c', '#3498db', '#2ecc71', '#f39c12', '#9b59b6', '#1abc9c'][idx],
                   linewidth=1, alpha=0.8)
    
    model_r2 = all_results[all_results['Model'] == model_name]['R¬≤'].values[0]
    axes[idx].set_title(f'{model_name} (R¬≤ = {model_r2:.4f})', fontweight='bold', fontsize=11)
    axes[idx].set_xlabel('Time (hours)', fontsize=9)
    axes[idx].set_ylabel(f'{series_name.title()} (MW)', fontsize=9)
    axes[idx].legend(loc='upper right', fontsize=8)
    axes[idx].grid(alpha=0.3)

plt.suptitle(f'Prediction Comparison - All Models ({series_name.title()})', 
             fontweight='bold', fontsize=14, y=1.0)
plt.tight_layout()
plt.savefig(f'results/figures/predictions_comparison_{series_name}.png', 
            dpi=150, bbox_inches='tight')
plt.show()

print(f"‚úÖ Predictions plot saved to: results/figures/predictions_comparison_{series_name}.png")

## üéØ Key Insights

### GPU Performance Gains:
- **LSTM/Bi-LSTM**: ~30-50x faster on GPU vs CPU
- **N-BEATS/N-HiTS**: Only feasible on GPU (100+ epochs in minutes)
- **Generative Models**: 10-20x speedup

### Model Performance Rankings:
1. **Advanced Models** (N-BEATS, N-HiTS) - Often best for complex patterns
2. **Basic DL** (LSTM, Bi-LSTM) - Good baseline, interpretable
3. **Generative** (AE, VAE) - Useful for anomaly detection, uncertainty quantification

### When to Use What:
- **N-BEATS/N-HiTS**: Best for pure forecasting accuracy
- **Bi-LSTM**: Good balance of performance and interpretability
- **VAE**: When you need uncertainty estimates
- **Autoencoder**: For anomaly detection alongside forecasting

### Production Recommendations:
- **High Accuracy Needed**: N-BEATS or N-HiTS
- **Real-time Inference**: Bi-LSTM (faster inference)
- **Interpretability**: Standard LSTM with attention
- **Anomaly Detection**: VAE or Autoencoder

---

## üîÑ Batch-Verarbeitung: Alle Zeitreihen

Falls `RUN_ALL_SERIES = True` gesetzt wurde, wird folgender Code alle Zeitreihen nacheinander verarbeiten.

In [None]:
if RUN_ALL_SERIES and len(series_to_process) > 1:
    print("="*80)
    print("üîÑ BATCH MODE: Processing all time series")
    print("="*80)
    
    # Storage for all results
    batch_results = []
    
    for idx, current_series in enumerate(series_to_process, 1):
        print(f"\n{'='*80}")
        print(f"üìä Processing {idx}/{len(series_to_process)}: {current_series.upper()}")
        print(f"{'='*80}\n")
        
        try:
            # Load data
            train_df = pd.read_csv(f'data/processed/{current_series}_train.csv')
            val_df = pd.read_csv(f'data/processed/{current_series}_val.csv')
            test_df = pd.read_csv(f'data/processed/{current_series}_test.csv')
            
            # Determine value column
            value_col = [c for c in train_df.columns if c in ['solar', 'price', 'value', 
                                                                 'wind_offshore', 'wind_onshore', 'consumption']][0]
            feature_cols = [c for c in train_df.columns if c not in ['timestamp', value_col]]
            
            print(f"‚úÖ Data loaded: {len(train_df)} train, {len(val_df)} val, {len(test_df)} test")
            print(f"   Value column: {value_col}, Features: {len(feature_cols)}")
            
            # Scale data
            scaler_X = StandardScaler()
            scaler_y = StandardScaler()
            
            X_train = scaler_X.fit_transform(train_df[feature_cols])
            y_train = scaler_y.fit_transform(train_df[[value_col]])
            
            X_val = scaler_X.transform(val_df[feature_cols])
            y_val = scaler_y.transform(val_df[[value_col]])
            
            X_test = scaler_X.transform(test_df[feature_cols])
            y_test_orig = test_df[value_col].values
            
            # Create sequences
            seq_length = 24
            X_train_seq, y_train_seq = create_sequences(X_train, y_train.flatten(), seq_length)
            X_val_seq, y_val_seq = create_sequences(X_val, y_val.flatten(), seq_length)
            X_test_seq, _ = create_sequences(X_test, np.zeros(len(X_test)), seq_length)
            y_test_seq = y_test_orig[seq_length:]
            
            print(f"‚úÖ Sequences created: {X_train_seq.shape}")
            
            # Train only key models (faster for batch)
            series_results = []
            
            # 1. Bi-LSTM
            print("\nüîß Training Bi-LSTM...")
            model_bilstm = keras.Sequential([
                layers.Bidirectional(layers.LSTM(64, return_sequences=True), input_shape=(seq_length, X_train.shape[1])),
                layers.Dropout(0.2),
                layers.Bidirectional(layers.LSTM(32)),
                layers.Dropout(0.2),
                layers.Dense(32, activation='relu'),
                layers.Dense(1)
            ])
            model_bilstm.compile(optimizer='adam', loss='mse', metrics=['mae'])
            
            start = time.time()
            model_bilstm.fit(X_train_seq, y_train_seq, validation_data=(X_val_seq, y_val_seq),
                           epochs=30, batch_size=64, callbacks=[early_stop], verbose=0)
            train_time = time.time() - start
            
            y_pred_scaled = model_bilstm.predict(X_test_seq, verbose=0).flatten()
            y_pred = scaler_y.inverse_transform(y_pred_scaled.reshape(-1, 1)).flatten()
            
            r2 = r2_score(y_test_seq, y_pred)
            rmse = np.sqrt(mean_squared_error(y_test_seq, y_pred))
            mae = mean_absolute_error(y_test_seq, y_pred)
            
            series_results.append({
                'Series': current_series,
                'Model': 'Bi-LSTM',
                'R¬≤': r2,
                'RMSE': rmse,
                'MAE': mae,
                'Time': train_time
            })
            print(f"   ‚úÖ Bi-LSTM: R¬≤={r2:.4f}, RMSE={rmse:.2f}, Time={train_time:.1f}s")
            
            # 2. N-BEATS (if Darts available)
            try:
                print("\nüîß Training N-BEATS...")
                train_df_ts = train_df.copy()
                train_df_ts['timestamp'] = pd.to_datetime(train_df_ts['timestamp'])
                train_df_ts = train_df_ts.set_index('timestamp')
                
                val_df_ts = val_df.copy()
                val_df_ts['timestamp'] = pd.to_datetime(val_df_ts['timestamp'])
                val_df_ts = val_df_ts.set_index('timestamp')
                
                test_df_ts = test_df.copy()
                test_df_ts['timestamp'] = pd.to_datetime(test_df_ts['timestamp'])
                test_df_ts = test_df_ts.set_index('timestamp')
                
                ts_train = TimeSeries.from_dataframe(train_df_ts, value_cols=value_col, freq='H')
                ts_val = TimeSeries.from_dataframe(val_df_ts, value_cols=value_col, freq='H')
                ts_test = TimeSeries.from_dataframe(test_df_ts, value_cols=value_col, freq='H')
                
                scaler_darts = Scaler()
                ts_train_scaled = scaler_darts.fit_transform(ts_train)
                ts_val_scaled = scaler_darts.transform(ts_val)
                
                model_nbeats = NBEATSModel(
                    input_chunk_length=168, output_chunk_length=24,
                    num_stacks=20, num_blocks=1, num_layers=3, layer_widths=128,
                    n_epochs=30, batch_size=64,
                    pl_trainer_kwargs={'accelerator': 'gpu', 'devices': 1, 'enable_progress_bar': False},
                    model_name=f'nbeats_{current_series}',
                    force_reset=True, save_checkpoints=False
                )
                
                start = time.time()
                model_nbeats.fit(series=ts_train_scaled, val_series=ts_val_scaled, verbose=False)
                train_time_nbeats = time.time() - start
                
                pred_nbeats_scaled = model_nbeats.predict(n=len(ts_test), series=ts_train_scaled)
                pred_nbeats = scaler_darts.inverse_transform(pred_nbeats_scaled)
                
                y_pred_nbeats = pred_nbeats.values().flatten()
                y_test_nbeats = ts_test.values().flatten()
                min_len = min(len(y_pred_nbeats), len(y_test_nbeats))
                
                r2_nbeats = r2_score(y_test_nbeats[:min_len], y_pred_nbeats[:min_len])
                rmse_nbeats = np.sqrt(mean_squared_error(y_test_nbeats[:min_len], y_pred_nbeats[:min_len]))
                mae_nbeats = mean_absolute_error(y_test_nbeats[:min_len], y_pred_nbeats[:min_len])
                
                series_results.append({
                    'Series': current_series,
                    'Model': 'N-BEATS',
                    'R¬≤': r2_nbeats,
                    'RMSE': rmse_nbeats,
                    'MAE': mae_nbeats,
                    'Time': train_time_nbeats
                })
                print(f"   ‚úÖ N-BEATS: R¬≤={r2_nbeats:.4f}, RMSE={rmse_nbeats:.2f}, Time={train_time_nbeats:.1f}s")
                
            except Exception as e:
                print(f"   ‚ö†Ô∏è N-BEATS skipped: {e}")
            
            # Add to batch results
            batch_results.extend(series_results)
            
            print(f"\n‚úÖ {current_series.upper()} completed!\n")
            
        except Exception as e:
            print(f"‚ùå Error processing {current_series}: {e}")
            continue
    
    # Summary
    print("\n" + "="*80)
    print("üèÜ BATCH PROCESSING COMPLETE")
    print("="*80)
    
    batch_df = pd.DataFrame(batch_results)
    print("\n" + batch_df.to_string(index=False))
    
    # Save batch results
    batch_df.to_csv('results/metrics/batch_all_series_gpu.csv', index=False)
    print(f"\n‚úÖ Batch results saved to: results/metrics/batch_all_series_gpu.csv")
    
    # Visualization: Comparison across series
    fig, axes = plt.subplots(1, 2, figsize=(16, 6))
    
    # R¬≤ by Series and Model
    pivot_r2 = batch_df.pivot(index='Series', columns='Model', values='R¬≤')
    pivot_r2.plot(kind='bar', ax=axes[0], color=['#3498db', '#e74c3c'])
    axes[0].set_ylabel('R¬≤ Score', fontweight='bold')
    axes[0].set_title('R¬≤ Score by Series and Model', fontweight='bold', fontsize=12)
    axes[0].legend(title='Model')
    axes[0].grid(alpha=0.3, axis='y')
    axes[0].tick_params(axis='x', rotation=45)
    
    # Training Time by Series
    pivot_time = batch_df.pivot(index='Series', columns='Model', values='Time')
    pivot_time.plot(kind='bar', ax=axes[1], color=['#3498db', '#e74c3c'])
    axes[1].set_ylabel('Training Time (s)', fontweight='bold')
    axes[1].set_title('Training Time by Series and Model', fontweight='bold', fontsize=12)
    axes[1].legend(title='Model')
    axes[1].grid(alpha=0.3, axis='y')
    axes[1].tick_params(axis='x', rotation=45)
    
    plt.tight_layout()
    plt.savefig('results/figures/batch_all_series_comparison.png', dpi=150, bbox_inches='tight')
    plt.show()
    
    print(f"‚úÖ Batch comparison plot saved!")
    
else:
    print("\nüí° Einzelne Zeitreihe wurde verarbeitet.")
    print("   Setze RUN_ALL_SERIES = True f√ºr Batch-Verarbeitung.")

---

## üìù Anleitung: Zeitreihe wechseln

### ‚úÖ Methode 1: Einzelne Zeitreihe
```python
# In der Konfigurations-Zelle (Zelle 7):
SERIES_NAME = 'wind_offshore'  # √Ñndere hier die Zeitreihe
RUN_ALL_SERIES = False

# Dann: Restart Runtime + Run All
```

### üîÑ Methode 2: Alle Zeitreihen
```python
# In der Konfigurations-Zelle (Zelle 7):
RUN_ALL_SERIES = True  # Batch-Modus aktivieren

# Dann: Restart Runtime + Run All
# ‚Üí Verarbeitet automatisch alle 5 Zeitreihen nacheinander
# ‚Üí Erstellt Vergleichstabelle und Plots
# ‚Üí Dauert ca. 20-40 Min f√ºr alle Serien
```

### üìä Output-Dateien:
- Einzeln: `results/metrics/deep_learning_comprehensive_{series_name}.csv`
- Batch: `results/metrics/batch_all_series_gpu.csv`
- Plots: `results/figures/`

### üí° Empfehlung:
- **Erste Experimente**: Einzelne Zeitreihe (schneller)
- **Finale Evaluation**: Batch-Modus (systematischer Vergleich)