In [22]:
import pandas as pd
df_time = pd.read_csv('time_series_60min_singleindex.csv')
df_pv = pd.read_csv('ninja_pv_wind_profiles_singleindex.csv')
df_weather = pd.read_csv('weather_data.csv')

In [23]:
df_time = df_time[['utc_timestamp', 'DE_solar_generation_actual', 'DE_solar_capacity']]
df_time.rename(columns={'utc_timestamp': 'time'}, inplace=True)
df_time.head()

Unnamed: 0,time,DE_solar_generation_actual,DE_solar_capacity
0,2014-12-31T23:00:00Z,,37248.0
1,2015-01-01T00:00:00Z,,37248.0
2,2015-01-01T01:00:00Z,,37248.0
3,2015-01-01T02:00:00Z,,37248.0
4,2015-01-01T03:00:00Z,,37248.0


In [24]:
columns = ['utc_timestamp', 'DE_temperature', 
                    'DE_radiation_direct_horizontal', 'DE_radiation_diffuse_horizontal']

df_weather = df_weather[columns]
df_weather.rename(columns={'utc_timestamp': 'time'}, inplace=True)
df_weather.head()

Unnamed: 0,time,DE_temperature,DE_radiation_direct_horizontal,DE_radiation_diffuse_horizontal
0,1980-01-01T00:00:00Z,-1.261,0.0,0.0
1,1980-01-01T01:00:00Z,-1.414,0.0,0.0
2,1980-01-01T02:00:00Z,-1.571,0.0,0.0
3,1980-01-01T03:00:00Z,-1.76,0.0,0.0
4,1980-01-01T04:00:00Z,-1.995,0.0,0.0


In [25]:
columns = ['time', 'DE_pv_national_current']

df_pv = df_pv[columns]
df_pv.head()

Unnamed: 0,time,DE_pv_national_current
0,1980-01-01T00:00:00Z,0.0
1,1980-01-01T01:00:00Z,0.0
2,1980-01-01T02:00:00Z,0.0
3,1980-01-01T03:00:00Z,0.0
4,1980-01-01T04:00:00Z,0.0


In [26]:
df_weather['time'] = pd.to_datetime(df_weather['time']).dt.tz_localize(None)
df_pv['time'] = pd.to_datetime(df_pv['time']).dt.tz_localize(None)
df_time['time'] = pd.to_datetime(df_time['time']).dt.tz_localize(None)

In [27]:
df_timeseries = pd.merge(df_time, df_pv, on='time', how='inner')
df = pd.merge(df_timeseries, df_weather, on='time', how='inner')
df.head()

Unnamed: 0,time,DE_solar_generation_actual,DE_solar_capacity,DE_pv_national_current,DE_temperature,DE_radiation_direct_horizontal,DE_radiation_diffuse_horizontal
0,2014-12-31 23:00:00,,37248.0,0.0,-0.94,0.0,0.0
1,2015-01-01 00:00:00,,37248.0,0.0,-0.981,0.0,0.0
2,2015-01-01 01:00:00,,37248.0,0.0,-1.035,0.0,0.0
3,2015-01-01 02:00:00,,37248.0,0.0,-1.109,0.0,0.0
4,2015-01-01 03:00:00,,37248.0,0.0,-1.166,0.0,0.0


In [28]:
df.fillna(0, inplace=True)

In [29]:
df.to_csv('dataset.csv', index=False)

In [30]:
import numpy as np
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error
import tensorflow as tf

In [31]:
# Handle missing values
df['DE_solar_generation_actual'] = df['DE_solar_generation_actual'].ffill()


In [32]:
# Select features
features = ['DE_solar_capacity', 'DE_pv_national_current', 'DE_temperature', 
            'DE_radiation_direct_horizontal', 'DE_radiation_diffuse_horizontal']
target = 'DE_solar_generation_actual'

# Scale features
feature_scaler = MinMaxScaler()
df[features] = feature_scaler.fit_transform(df[features])

# Scale target
target_scaler = MinMaxScaler()
df[[target]] = target_scaler.fit_transform(df[[target]])


In [33]:

# Convert to sequences
def create_sequences(df, target_col, timesteps):
    X, y = [], []
    for i in range(len(df) - timesteps):
        X.append(df[features].iloc[i:i+timesteps].values)
        y.append(df[target_col].iloc[i+timesteps])
    return np.array(X), np.array(y)

timesteps = 24  # Use 24 hours of data to predict the next hour
X, y = create_sequences(df, target, timesteps)

In [34]:

# Train-test split
split = int(0.8 * len(X))
X_train, X_test = X[:split], X[split:]
y_train, y_test = y[:split], y[split:]


In [35]:

# Build LSTM model
model = tf.keras.Sequential([
    tf.keras.layers.LSTM(64, return_sequences=True, input_shape=(timesteps, len(features))),
    tf.keras.layers.Dropout(0.2),
    tf.keras.layers.LSTM(32),
    tf.keras.layers.Dropout(0.2),
    tf.keras.layers.Dense(1)
])

model.compile(optimizer='adam', loss='mean_squared_error')

# Train the model
history = model.fit(X_train, y_train, epochs=50, batch_size=32, validation_split=0.2, verbose=1)


Epoch 1/50


  super().__init__(**kwargs)


[1m876/876[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m15s[0m 14ms/step - loss: 0.0120 - val_loss: 0.0032
Epoch 2/50
[1m876/876[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 13ms/step - loss: 0.0032 - val_loss: 0.0027
Epoch 3/50
[1m876/876[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m13s[0m 14ms/step - loss: 0.0028 - val_loss: 0.0023
Epoch 4/50
[1m876/876[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m14s[0m 16ms/step - loss: 0.0025 - val_loss: 0.0023
Epoch 5/50
[1m876/876[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m17s[0m 19ms/step - loss: 0.0024 - val_loss: 0.0023
Epoch 6/50
[1m876/876[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m19s[0m 18ms/step - loss: 0.0023 - val_loss: 0.0021
Epoch 7/50
[1m876/876[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m18s[0m 21ms/step - loss: 0.0022 - val_loss: 0.0018
Epoch 8/50
[1m876/876[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m17s[0m 19ms/step - loss: 0.0022 - val_loss: 0.0022
Epoch 9/50
[1m876/876[0m [32m━━━

In [36]:

# Evaluate the model
y_pred = model.predict(X_test)
rmse = np.sqrt(mean_squared_error(y_test, y_pred))
print(f"RMSE: {rmse}")




[1m274/274[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 7ms/step
RMSE: 0.043199008979066165
