# 🌬️ LSTM-based Wind Power Forecasting (DK1)

Predict wind power output using historical windspeed and time features via LSTM

In [1]:
# 1. Imports

import joblib

import pandas as pd
import numpy as np
import matplotlib.pyplot as pl

from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error, r2_score
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense
from tensorflow.keras.callbacks import EarlyStopping

import os

2025-04-05 15:44:31.255660: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: SSE4.1 SSE4.2, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [None]:
# 1. Load dataset
df = pd.read_csv("data/merged_training_data_2016.csv", parse_dates=["timestamp"])
df_wind = pd.read_csv("data/wind_power.csv", parse_dates=["utc_timestamp"])

df["timestamp"] = df["timestamp"].dt.tz_localize(None)
df_wind["utc_timestamp"] = df_wind["utc_timestamp"].dt.tz_localize(None)
df_wind.rename(columns={"utc_timestamp": "timestamp"}, inplace=True)

df = pd.merge(df, df_wind, on="timestamp")
df.rename(columns={df.columns[-1]: "wind_power_actual"}, inplace=True)

# 2. Select features
features = [
    "wind_power_actual",     # target
    "windspeed_10m",
    "winddirection_10m",
    "temperature_2m",
    "surface_pressure",
    "relativehumidity_2m"
]

df = df[features].dropna()

# 3. Normalize features
scaler = MinMaxScaler()
data_scaled = scaler.fit_transform(df)

feature_columns = df.columns.tolist()
joblib.dump(feature_columns, "models/feature_columns.pkl")

# 4. Create sequences
lookback = 24

def create_sequences(data, lookback):
    X, y = [], []
    for i in range(len(data) - lookback):
        X.append(data[i:i+lookback, 1:])      # skip column 0 (target) for X
        y.append(data[i+lookback, 0])
    return np.array(X), np.array(y)

X, y = create_sequences(data_scaled, lookback)

# 5. Train/test split

split = int(len(X) * 0.8)
X_train, X_test = X[:split], X[split:]
y_train, y_test = y[:split], y[split:]

# 6. Build LSTM model

model = Sequential()
model.add(LSTM(64, input_shape=(X.shape[1], X.shape[2])))
model.add(Dense(1))
model.compile(optimizer="adam", loss="mse")

# 7. Train

es = EarlyStopping(patience=10, restore_best_weights=True)
history = model.fit(X_train, y_train, epochs=100, batch_size=16,
                    validation_split=0.2, callbacks=[es], verbose=1)

# 8. Predict

y_pred = model.predict(X_test)

# 9. Inverse transform target

dummy = np.zeros((len(y_pred), data_scaled.shape[1]))
dummy[:, 0] = y_pred[:, 0]
y_pred_inv = scaler.inverse_transform(dummy)[:, 0]

dummy[:, 0] = y_test
y_test_inv = scaler.inverse_transform(dummy)[:, 0]


In [None]:
# 10. Evaluate

rmse = np.sqrt(mean_squared_error(y_test_inv, y_pred_inv))
r2 = r2_score(y_test_inv, y_pred_inv)
print(f"LSTM RMSE: {rmse:.2f} MW")
print(f"LSTM R² Score: {r2:.2f}")

# 11. Plot

plt.figure(figsize=(12,5))
plt.plot(y_test_inv[:200], label="Actual")
plt.plot(y_pred_inv[:200], label="LSTM Predicted")
plt.title("LSTM Wind Power Prediction (first 200 hours)")
plt.xlabel("Time Index")
plt.ylabel("Power [MW]")
plt.legend()
plt.tight_layout()
os.makedirs("plots", exist_ok=True)
plt.savefig("plots/LSTM_MultiFeature_Prediction.png")
plt.show()

# 12. Save model & scaler

model.save("models/lstm_model_multifeature.keras")
joblib.dump(scaler, "models/scaler_lstm_multifeature.pkl")