<a href="https://colab.research.google.com/github/imranow/LSTM_Study/blob/main/LSTM_Study.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [13]:
from google.colab import files
import numpy as np
from sklearn.neural_network import MLPRegressor
from sklearn.linear_model import LinearRegression
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense
from sklearn.metrics import mean_squared_error
from math import sqrt
import pandas as pd

uploaded = files.upload()

# Load datasets
train_data = np.loadtxt("train_NREL_solar_data.csv", delimiter=",")
val_data   = np.loadtxt("validate_NREL_solar_data.csv", delimiter=",")
test_data  = np.loadtxt("test_NREL_solar_data.csv", delimiter=",")

# Separate features (X) and target (y)
X_train_all = train_data[:, :9]
y_train_all = train_data[:, -1]
X_val_all   = val_data[:, :9]
y_val_all   = val_data[:, -1]
X_test_all  = test_data[:, :9]
y_test_all  = test_data[:, -1]

# Determine number of days in each set (each day = 11 hours)
N_train_days = X_train_all.shape[0] // 11
N_val_days   = X_val_all.shape[0] // 11
N_test_days  = X_test_all.shape[0] // 11

# Reshape into [days, 11, ...] for convenience
X_train = X_train_all.reshape(N_train_days, 11, 9)
y_train = y_train_all.reshape(N_train_days, 11)
X_val   = X_val_all.reshape(N_val_days, 11, 9)
y_val   = y_val_all.reshape(N_val_days, 11)
X_test  = X_test_all.reshape(N_test_days, 11, 9)
y_test  = y_test_all.reshape(N_test_days, 11)

# Denormalization function for irradiance (assuming min=0, max≈1087.44 W/m²)
orig_min = 0.0
orig_max = 1087.4396  # scaling factor from dataset
def denormalize(y_norm):
    """Convert normalized value in [-1,1] back to original scale."""
    return ((y_norm + 1) / 2) * (orig_max - orig_min) + orig_min


# Prepare array for persistence predictions on test set
y_pred_persist = np.zeros_like(y_test)  # shape (N_test_days, 11)

# Seed the prediction for day 0 of test with the last day of validation
y_pred_persist[0] = y_val[-1]

# For each subsequent test day, use the previous day's actual irradiance
for day in range(1, N_test_days):
    y_pred_persist[day] = y_test[day - 1]

# Compute Test RMSE for persistence (normalized values)
rmse_persist_norm = sqrt(mean_squared_error(y_test.flatten(), y_pred_persist.flatten()))

# Convert RMSE to original units
rmse_persist = rmse_persist_norm * (orig_max - orig_min) / 2.0

print(f"Persistence model – Test RMSE: {rmse_persist:.3f} W/m²")

#--

# Flatten the feature and target arrays for training and testing
X_train_flat = X_train.reshape(-1, 9)   # shape: (N_train_days*11, 9)
y_train_flat = y_train.flatten()       # shape: (N_train_days*11,)
X_test_flat  = X_test.reshape(-1, 9)    # shape: (N_test_days*11, 9)
y_test_flat  = y_test.flatten()        # shape: (N_test_days*11,)

# Train the linear regression model
lr_model = LinearRegression()
lr_model.fit(X_train_flat, y_train_flat)

# Predict on test data
y_pred_test_lr  = lr_model.predict(X_test_flat)

# Compute RMSE for testing (in normalized scale)
rmse_test_lr_norm  = sqrt(mean_squared_error(y_test_flat, y_pred_test_lr))

# Convert RMSE to original units
rmse_test_lr  = rmse_test_lr_norm  * (orig_max - orig_min) / 2.0

print(f"Linear Regression – Test RMSE: {rmse_test_lr:.3f} W/m²")

#--

# Lists to store RMSE values for each run
test_rmse_list = []

bpnn = MLPRegressor(hidden_layer_sizes=(50,), activation='tanh',
                        solver='sgd', max_iter=2500, random_state=i)
bpnn.fit(X_train_flat, y_train_flat)

# Predict on test sets
y_pred_test_bpnn  = bpnn.predict(X_test_flat)

# Compute RMSE (normalized)
rmse_test_bpnn_norm  = sqrt(mean_squared_error(y_test_flat, y_pred_test_bpnn))

test_rmse_list.append(rmse_test_bpnn_norm   * (orig_max - orig_min) / 2.0)

# Compute mean and standard deviation of RMSE across runs
test_rmse_mean  = np.mean(test_rmse_list)
test_rmse_std   = np.std(test_rmse_list)

print(f"BPNN – Test RMSE: {test_rmse_mean:.3f} ± {test_rmse_std:.3f} W/m²")

#--

# LSTM model training and evaluation
rmse2 = 0

model = Sequential()
model.add(LSTM(50, input_shape=(11, 9), return_sequences=True))
model.add(Dense(1, activation='linear'))
model.compile(loss='mse', optimizer='adam')

# Train the LSTM model
history = model.fit(X_train, y_train, epochs=100, batch_size=50, validation_data=(X_val, y_val))

# Make predictions on the test and training set
yhat = model.predict(X_test)
y_te = yhat.reshape(N_test_days * 11,)

# Calculate RMSE for the LSTM model
rmse2 += mean_squared_error(y_te, y_test_all) * N_test_days * 11

rmse_test_lstm = sqrt(rmse2 / 4026) * (orig_max - orig_min) / 2.0
print('LSTM – Test RMSE: %.3f' % rmse_test_lstm)

#Table of all algorithms RMSEs
data = {
    'Algorithm': ['Persistence', 'Linear Regression', 'BPNN', 'LSTM'],
    'RMSE': [rmse_persist, rmse_test_lr, test_rmse_mean, rmse_test_lstm]
}

rmse_df = pd.DataFrame(data)
rmse_df


Saving figure_7.png to figure_7 (11).png
Saving figure_8.png to figure_8 (11).png
Saving forecast_NREL.py to forecast_NREL (11).py
Saving test_NREL_solar_data.csv to test_NREL_solar_data (11).csv
Saving train_NREL_solar_data.csv to train_NREL_solar_data (11).csv
Saving validate_NREL_solar_data.csv to validate_NREL_solar_data (12).csv
Persistence model – Test RMSE: 208.997 W/m²
Linear Regression – Test RMSE: 218.563 W/m²
BPNN – Test RMSE: 118.032 ± 0.000 W/m²
Epoch 1/100


  super().__init__(**kwargs)


[1m64/64[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 14ms/step - loss: 0.2357 - val_loss: 0.1082
Epoch 2/100
[1m64/64[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 9ms/step - loss: 0.0960 - val_loss: 0.0633
Epoch 3/100
[1m64/64[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 9ms/step - loss: 0.0579 - val_loss: 0.0448
Epoch 4/100
[1m64/64[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 9ms/step - loss: 0.0424 - val_loss: 0.0377
Epoch 5/100
[1m64/64[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 9ms/step - loss: 0.0378 - val_loss: 0.0345
Epoch 6/100
[1m64/64[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 15ms/step - loss: 0.0327 - val_loss: 0.0307
Epoch 7/100
[1m64/64[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 14ms/step - loss: 0.0298 - val_loss: 0.0290
Epoch 8/100
[1m64/64[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 14ms/step - loss: 0.0295 - val_loss: 0.0285
Epoch 9/100
[1m64/64[0m [32m━━━━━━━━━━━━━━━━━━━━[0m

Unnamed: 0,Algorithm,RMSE
0,Persistence,208.996708
1,Linear Regression,218.56311
2,BPNN,118.032087
3,LSTM,78.780004
