# Wind energy production prediction

## Importing packages 

In [2]:
import pandas as pd
import numpy as np 
import tensorflow as tf
import matplotlib.pyplot as plt

pd.set_option('display.max_columns', None)

print('Pandas version:', pd.__version__) # 2.2.3
print('Numpy version:', np.__version__) # 1.26.4
print('TensorFlow version:', tf.__version__) # 2.18.0

Pandas version: 2.2.2
Numpy version: 1.26.4
TensorFlow version: 2.18.0


## Loading the data

In [3]:
df = pd.read_csv('../data/Model/production_wind_data.csv')
df

Unnamed: 0,datetime,point,type,granularity,timezone,activity,classification,capacity,volume,percentage,emission,emissionfactor,Station,WindDirection,WindSpeedAvg60min,WindSpeedAvg10min,WindGust,Temperature,DewPoint,Sunshineperhour,GlobalRadiation,PrecipitationDuration,HourlyPrecipitationAmount,Pressure,HorizontalVisibility,CloudCover,RelativeAtmosphericHumidity,IndicatorWeatherCode,Fog,Rain,Snow,Thunder,IceFormation,AvgDailyTemperature,TemperatureAvgMonthlyTemperature
0,2021-01-01 00:00:00,Nederland,Wind,Hour,UTC,Providing,Current,95906,95906,0.026977,0,0.0,260,230,10,10,20,-2.5,-26,0,0,0,0,10059,1,0,98,7,1,0,0,0,1,2.970833,3.376747
1,2021-01-01 01:00:00,Nederland,Wind,Hour,UTC,Providing,Current,122117,122117,0.034349,0,0.0,260,230,10,10,20,-3.2,-34,0,0,0,0,10061,0,1,98,7,1,0,0,0,1,2.970833,3.376747
2,2021-01-01 02:00:00,Nederland,Wind,Hour,UTC,Providing,Current,87353,87353,0.024571,0,0.0,260,230,20,20,30,-2.7,-29,0,0,0,0,10064,4,7,98,7,1,0,0,0,1,2.970833,3.376747
3,2021-01-01 03:00:00,Nederland,Wind,Hour,UTC,Providing,Current,59687,59687,0.016788,0,0.0,260,220,20,20,30,-1.1,-14,0,0,0,0,10064,15,8,98,7,1,0,0,0,1,2.970833,3.376747
4,2021-01-01 04:00:00,Nederland,Wind,Hour,UTC,Providing,Current,53359,53359,0.015008,0,0.0,260,230,20,20,40,1.1,5,0,0,0,0,10064,18,8,95,7,0,0,0,0,0,2.970833,3.376747
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
52553,2023-12-31 18:00:00,Nederland,WindOffshoreC,Hour,UTC,Providing,Current,3144000,3144000,1.431400,0,0.0,260,200,70,60,150,8.6,51,0,0,0,-1,9950,75,8,78,7,0,1,0,0,0,8.829167,6.888441
52554,2023-12-31 19:00:00,Nederland,WindOffshoreC,Hour,UTC,Providing,Current,3102250,3102250,1.412392,0,0.0,260,200,80,70,140,8.6,50,0,0,0,-1,9951,75,8,77,7,0,1,0,0,0,8.829167,6.888441
52555,2023-12-31 20:00:00,Nederland,WindOffshoreC,Hour,UTC,Providing,Current,3086249,3086249,1.405107,0,0.0,260,200,70,70,150,8.2,56,0,0,0,-1,9954,70,8,84,7,0,1,0,0,0,8.829167,6.888441
52556,2023-12-31 21:00:00,Nederland,WindOffshoreC,Hour,UTC,Providing,Current,3051749,3051749,1.389403,0,0.0,260,200,70,70,140,8.1,56,0,0,3,2,9955,70,8,84,7,0,1,0,0,0,8.829167,6.888441


## Modeling

In [1]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout
import matplotlib.pyplot as plt

# Data preparation
if 'datetime' not in df.columns:
    df['datetime'] = df.index

df['datetime'] = pd.to_datetime(df['datetime'])
df.set_index('datetime', inplace=True)

# Features and target selection
features = ['WindSpeedAvg60min', 'Temperature', 'Pressure', 'RelativeAtmosphericHumidity']
target = 'capacity'

# Scale the data
feature_scaler = MinMaxScaler()
target_scaler = MinMaxScaler()

scaled_features = feature_scaler.fit_transform(df[features])
scaled_target = target_scaler.fit_transform(df[[target]])

scaled_data = np.hstack([scaled_features, scaled_target])

# Create sequences
def create_sequences(data, seq_length):
    xs, ys = [], []
    for i in range(len(data) - seq_length):
        x = data[i:i+seq_length, :-1]
        y = data[i+seq_length, -1]
        xs.append(x)
        ys.append(y)
    return np.array(xs), np.array(ys)

seq_length = 24  # Use past 24 hours to predict the next hour
X, y = create_sequences(scaled_data, seq_length)

# Split the data into training and testing sets
split = int(0.8 * len(X))
X_train, X_test = X[:split], X[split:]
y_train, y_test = y[:split], y[split:]

# LSTM model
model = Sequential([
    LSTM(128, return_sequences=True, input_shape=(seq_length, len(features))),
    Dropout(0.2),
    LSTM(256),
    Dropout(0.2),
    Dense(1)
])

# Compile the model
model.compile(optimizer='adam', loss='mse', metrics=['mae'])

# Train the model
history = model.fit(
    X_train, y_train,
    epochs=30,
    batch_size=32,
    validation_split=0.2,
    verbose=2
)

# Make predictions
predictions = model.predict(X_test)

# Inverse transform predictions and actual values
predicted_capacity = target_scaler.inverse_transform(predictions)
actual_capacity = target_scaler.inverse_transform(y_test.reshape(-1, 1)).flatten()

# Plot training history
plt.figure(figsize=(14, 5))
plt.plot(history.history['loss'], label='Training Loss')
plt.plot(history.history['val_loss'], label='Validation Loss')
plt.title('Training and Validation Loss')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.legend()
plt.grid(True)
plt.show()

# Plot Predictions vs Actual Values
plt.figure(figsize=(14, 5))
plt.plot(actual_capacity, label='Actual Capacity', alpha=0.7)
plt.plot(predicted_capacity, label='Predicted Capacity', alpha=0.7)
plt.title('Actual vs Predicted Capacity')
plt.xlabel('Time')
plt.ylabel('Capacity')
plt.legend()
plt.grid(True)
plt.show()




NameError: name 'df' is not defined