In [1]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout, Input
from tensorflow.keras.optimizers import Adam
import time

In [2]:
df = pd.read_csv('scada.csv')

In [3]:
df['Date/Time'] = pd.to_datetime(df['Date/Time'], format='%d %m %Y %H:%M', errors='coerce')
df.set_index('Date/Time', inplace=True)
df = df.sort_index()
df.dropna(inplace=True)

In [4]:
features = ['Wind Speed (m/s)', 'Wind Direction (°)', 'Theoretical_Power_Curve (KWh)']
target = 'LV ActivePower (kW)'

In [5]:
scaler_x = MinMaxScaler()
scaler_y = MinMaxScaler()

In [6]:
scaled_features = scaler_x.fit_transform(df[features])
scaled_target = scaler_y.fit_transform(df[[target]])

In [7]:
def create_sequences(features, target, time_steps=24):
    X, y = [], []
    for i in range(len(features) - time_steps):
        X.append(features[i:i+time_steps])
        y.append(target[i+time_steps])
    return np.array(X), np.array(y)

time_steps = 24  
X, y = create_sequences(scaled_features, scaled_target, time_steps)

In [8]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
model = Sequential([
    Input(shape=(X_train.shape[1], X_train.shape[2])),  # Capa de entrada explícita
    LSTM(128, activation='tanh', return_sequences=True),
    Dropout(0.3),
    LSTM(64, activation='tanh'),
    Dropout(0.3),
    Dense(32, activation='relu'),
    Dense(1)
])


In [17]:
model.compile(optimizer=Adam(learning_rate=0.0001), loss='mse', metrics=['mae'])

In [19]:
start_time = time.time()

In [None]:
history = model.fit(X_train, y_train, validation_split=0.2, epochs=50, batch_size=32, verbose=1)

Epoch 1/50
[1m1011/1011[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m16s[0m 15ms/step - loss: 0.0460 - mae: 0.1404 - val_loss: 0.0218 - val_mae: 0.0950
Epoch 2/50
[1m1011/1011[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m15s[0m 15ms/step - loss: 0.0238 - mae: 0.0970 - val_loss: 0.0193 - val_mae: 0.0839
Epoch 3/50
[1m1011/1011[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m15s[0m 15ms/step - loss: 0.0221 - mae: 0.0906 - val_loss: 0.0181 - val_mae: 0.0775
Epoch 4/50
[1m1011/1011[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m15s[0m 15ms/step - loss: 0.0211 - mae: 0.0875 - val_loss: 0.0174 - val_mae: 0.0764
Epoch 5/50
[1m1011/1011[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m17s[0m 16ms/step - loss: 0.0199 - mae: 0.0836 - val_loss: 0.0172 - val_mae: 0.0774
Epoch 6/50
[1m1011/1011[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m16s[0m 16ms/step - loss: 0.0192 - mae: 0.0814 - val_loss: 0.0166 - val_mae: 0.0743
Epoch 7/50
[1m1011/1011[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m

In [None]:

end_time = time.time()

training_time = end_time - start_time


print(f"Training Time: {training_time:.2f} seconds")

In [None]:
y_pred = model.predict(X_test)

y_test_rescaled = scaler_y.inverse_transform(y_test)
y_pred_rescaled = scaler_y.inverse_transform(y_pred)

In [None]:

from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
mse = mean_squared_error(y_test_rescaled, y_pred_rescaled)
mae = mean_absolute_error(y_test_rescaled, y_pred_rescaled)
r2 = r2_score(y_test_rescaled, y_pred_rescaled)

print(f"Mean Squared Error (MSE): {mse}")
print(f"Mean Absolute Error (MAE): {mae}")
print(f"R^2 Score: {r2}")


In [None]:
import matplotlib.pyplot as plt

In [None]:
import seaborn as sns
import matplotlib.pyplot as plt


correlation_matrix = df.corr()

plt.figure(figsize=(10, 8))
sns.heatmap(correlation_matrix, annot=True, fmt=".2f", cmap="coolwarm", cbar=True)

plt.title("Correlation Matrix Heatmap")
plt.show()

In [None]:
plt.figure(figsize=(12, 6))
plt.plot(df.index[-len(y_test_rescaled):][:200], y_test_rescaled[:200], label="Actual", alpha=0.8)
plt.plot(df.index[-len(y_pred_rescaled):][:200], y_pred_rescaled[:200], label="Predicted", alpha=0.8)
plt.title("LSTM: Actual vs Predicted (Hourly Time Series)")
plt.xlabel("Time (Hourly)")
plt.ylabel("Power Output (kW)")
plt.legend()
plt.grid()
plt.xticks(rotation=45)
plt.tight_layout()
plt.show()

In [None]:
# Create a DataFrame with actual, predicted, and timestamps
results_df = pd.DataFrame({
    'Timestamp': df.index[-len(y_test_rescaled):],  # Use the datetime index for test data
    'Actual': y_test_rescaled.flatten(),           # Flatten the arrays for tabular display
    'Predicted': y_pred_rescaled.flatten()
})

# Display the first 200 rows
print(results_df.head(200))

# Save as CSV if needed
results_df.to_csv('predicted_vs_actual.csv', index=False)

In [None]:
import matplotlib.pyplot as plt
hours = range(24)  # Horas en el eje X
plt.figure(figsize=(10, 6))
plt.plot(hours, y_test_rescaled[:24].flatten(), label='Actual', marker='o')
plt.plot(hours, y_pred_rescaled[:24].flatten(), label='Predicted', marker='x')
plt.title('LSTM Wind Power Forecasting (First 24 Hours)')
plt.xlabel('Hours')
plt.ylabel('Wind Power (kW)')
plt.legend()
plt.grid(True)
plt.show()