<a href="https://colab.research.google.com/github/imranow/Improved_LSTM/blob/main/Improved_LSTM_Study.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install --quiet catboost
!pip install keras-tcn
!pip install tensorflow
!pip install scikit-learn
!pip install pandas
!pip install numpy

from google.colab import files
import numpy as np
import pandas as pd
from sklearn.linear_model import LinearRegression
from sklearn.neural_network import MLPRegressor
from sklearn.metrics import mean_squared_error
from math import sqrt
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, InputLayer
from tcn import TCN
from catboost import CatBoostRegressor, Pool

#───────────────────────────────────────────────────────────────────────────────
# 0.  Install CatBoost
#───────────────────────────────────────────────────────────────────────────────
!pip install --quiet catboost

#───────────────────────────────────────────────────────────────────────────────
# 1. Upload & load datasets
#───────────────────────────────────────────────────────────────────────────────
uploaded = files.upload()
train_data = np.loadtxt("train_NREL_solar_data.csv", delimiter=",")
val_data   = np.loadtxt("validate_NREL_solar_data.csv", delimiter=",")
test_data  = np.loadtxt("test_NREL_solar_data.csv", delimiter=",")

#───────────────────────────────────────────────────────────────────────────────
# 2. Prepare feature / target arrays
#───────────────────────────────────────────────────────────────────────────────
X_tr_all, y_tr_all = train_data[:, :9], train_data[:, -1]
X_val_all, y_val_all = val_data[:, :9], val_data[:, -1]
X_te_all, y_te_all = test_data[:, :9], test_data[:, -1]

# each “day” = 11 hours
N_tr  = X_tr_all.shape[0] // 11
N_val = X_val_all.shape[0] // 11
N_te  = X_te_all.shape[0] // 11

X_train = X_tr_all.reshape(N_tr, 11, 9)
y_train = y_tr_all.reshape(N_tr, 11)
X_val   = X_val_all.reshape(N_val, 11, 9)
y_val   = y_val_all.reshape(N_val, 11)
X_test  = X_te_all.reshape(N_te, 11, 9)
y_test  = y_te_all.reshape(N_te, 11)

#───────────────────────────────────────────────────────────────────────────────
# 3. Denormalization
#───────────────────────────────────────────────────────────────────────────────
orig_min, orig_max = 0.0, 1087.4396
def denormalize(x):
    return ((x + 1)/2) * (orig_max - orig_min) + orig_min

#───────────────────────────────────────────────────────────────────────────────
# 4. Persistence
#───────────────────────────────────────────────────────────────────────────────
y_pred_p = np.zeros_like(y_test)
y_pred_p[0] = y_val[-1]
for d in range(1, N_te):
    y_pred_p[d] = y_test[d-1]
rmse_p = sqrt(mean_squared_error(y_test.flatten(), y_pred_p.flatten())) \
         * (orig_max-orig_min)/2
print(f"Persistence – RMSE: {rmse_p:.3f} W/m²")

#───────────────────────────────────────────────────────────────────────────────
# 5. Linear Regression
#───────────────────────────────────────────────────────────────────────────────
X_tr_flat, y_tr_flat = X_train.reshape(-1,9), y_train.flatten()
X_te_flat, y_te_flat = X_test.reshape(-1,9),  y_test.flatten()
lr = LinearRegression().fit(X_tr_flat, y_tr_flat)
rmse_lr = sqrt(mean_squared_error(y_te_flat, lr.predict(X_te_flat))) \
          * (orig_max-orig_min)/2
print(f"Linear Regression – RMSE: {rmse_lr:.3f} W/m²")

#───────────────────────────────────────────────────────────────────────────────
# 6. BPNN
#───────────────────────────────────────────────────────────────────────────────
bpnn = MLPRegressor(hidden_layer_sizes=(25,15), activation='tanh',
                    solver='sgd', max_iter=2500, random_state=42)
bpnn.fit(X_tr_flat, y_tr_flat)
rmse_bpnn = sqrt(mean_squared_error(y_te_flat, bpnn.predict(X_te_flat))) \
            * (orig_max-orig_min)/2
print(f"BPNN – RMSE: {rmse_bpnn:.3f} W/m²")

#───────────────────────────────────────────────────────────────────────────────
# 7. LSTM (100 epochs)
#───────────────────────────────────────────────────────────────────────────────
lstm = Sequential([
    LSTM(50, return_sequences=True, input_shape=(11,9)),
    Dense(1, activation='linear')
])
lstm.compile(loss='mse', optimizer='adam')
lstm.fit(X_train, y_train, epochs=100, batch_size=50,
         validation_data=(X_val, y_val), verbose=1)
yhat = lstm.predict(X_test).reshape(-1)
rmse_lstm = sqrt(mean_squared_error(yhat, y_te_all)) \
            * (orig_max-orig_min)/2
print(f"LSTM – RMSE: {rmse_lstm:.3f} W/m²")

#───────────────────────────────────────────────────────────────────────────────
# 8. TCN (100 epochs)
#───────────────────────────────────────────────────────────────────────────────
tcn_model = Sequential([
    InputLayer(input_shape=(11,9)),
    TCN(nb_filters=128, kernel_size=2, dilations=[1,2,4,8],
        activation='relu', dropout_rate=0.2,
        use_skip_connections=True, return_sequences=False),
    Dense(11, activation='linear')
])
tcn_model.compile(loss='mse', optimizer='adam')
tcn_model.fit(X_train, y_train, epochs=100, batch_size=64,
              validation_data=(X_val, y_val), verbose=1)
y_tcn = tcn_model.predict(X_test).reshape(-1)
rmse_tcn = sqrt(mean_squared_error(y_tcn, y_te_all)) \
           * (orig_max-orig_min)/2
print(f"TCN – RMSE: {rmse_tcn:.3f} W/m²")

#───────────────────────────────────────────────────────────────────────────────
# 9. CatBoost
#───────────────────────────────────────────────────────────────────────────────
train_pool = Pool(X_tr_flat, y_tr_flat)
val_pool   = Pool(X_te_flat, y_te_flat)
cat = CatBoostRegressor(
    iterations            = 500,
    learning_rate         = 0.1,
    depth                 = 6,
    loss_function         = 'RMSE',
    eval_metric           = 'RMSE',
    random_seed           = 42,
    early_stopping_rounds = 20,
    verbose               = False
)
cat.fit(train_pool, eval_set=val_pool)
y_cat_norm = cat.predict(X_te_flat)
y_cat_w    = denormalize(y_cat_norm)
y_true_w   = denormalize(y_te_flat)
rmse_cat = sqrt(mean_squared_error(y_true_w, y_cat_w))
print(f"CatBoost – RMSE: {rmse_cat:.3f} W/m²")

#───────────────────────────────────────────────────────────────────────────────
# 10. Summary
#───────────────────────────────────────────────────────────────────────────────
rmse_df = pd.DataFrame({
    'Algorithm': [
        'Persistence',
        'Linear Regression',
        'BPNN',
        'LSTM (100ep)',
        'TCN (100ep)',
        'CatBoost'
    ],
    'RMSE (W/m²)': [
        rmse_p,
        rmse_lr,
        rmse_bpnn,
        rmse_lstm,
        rmse_tcn,
        rmse_cat
    ]
})
print("\nFinal RMSE comparison:")
print(rmse_df.to_string(index=False))

[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m99.2/99.2 MB[0m [31m6.3 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting keras-tcn
  Downloading keras_tcn-3.5.6-py3-none-any.whl.metadata (13 kB)
Downloading keras_tcn-3.5.6-py3-none-any.whl (12 kB)
Installing collected packages: keras-tcn
Successfully installed keras-tcn-3.5.6


Saving test_NREL_solar_data.csv to test_NREL_solar_data.csv
Saving train_NREL_solar_data.csv to train_NREL_solar_data.csv
Saving validate_NREL_solar_data.csv to validate_NREL_solar_data.csv
Persistence – RMSE: 208.997 W/m²
Linear Regression – RMSE: 218.563 W/m²
BPNN – RMSE: 101.003 W/m²
Epoch 1/100


  super().__init__(**kwargs)


[1m64/64[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 13ms/step - loss: 0.2298 - val_loss: 0.0986
Epoch 2/100
[1m64/64[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 9ms/step - loss: 0.0904 - val_loss: 0.0569
Epoch 3/100
[1m64/64[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 12ms/step - loss: 0.0538 - val_loss: 0.0407
Epoch 4/100
[1m64/64[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 13ms/step - loss: 0.0405 - val_loss: 0.0366
Epoch 5/100
[1m64/64[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 13ms/step - loss: 0.0345 - val_loss: 0.0324
Epoch 6/100
[1m64/64[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 9ms/step - loss: 0.0320 - val_loss: 0.0303
Epoch 7/100
[1m64/64[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 9ms/step - loss: 0.0305 - val_loss: 0.0306
Epoch 8/100
[1m64/64[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 9ms/step - loss: 0.0290 - val_loss: 0.0275
Epoch 9/100
[1m64/64[0m [32m━━━━━━━━━━━━━━━━━━━━[0m



[1m50/50[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 87ms/step - loss: 5.2674 - val_loss: 0.1508
Epoch 2/100
[1m50/50[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 61ms/step - loss: 0.2859 - val_loss: 0.1084
Epoch 3/100
[1m50/50[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 59ms/step - loss: 0.1918 - val_loss: 0.0928
Epoch 4/100
[1m50/50[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 71ms/step - loss: 0.1491 - val_loss: 0.0836
Epoch 5/100
[1m50/50[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 70ms/step - loss: 0.1244 - val_loss: 0.0724
Epoch 6/100
[1m50/50[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 59ms/step - loss: 0.1074 - val_loss: 0.0698
Epoch 7/100
[1m50/50[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 81ms/step - loss: 0.0954 - val_loss: 0.0649
Epoch 8/100
[1m50/50[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 58ms/step - loss: 0.0859 - val_loss: 0.0581
Epoch 9/100
[1m50/50[0m [32m━━━━━━━━━━━━━━━━━━━━

In [None]:
from tensorflow.keras.models     import Sequential
from tensorflow.keras.layers     import Conv1D, LSTM, TimeDistributed, Dense
from sklearn.metrics             import mean_squared_error
from math                        import sqrt
import numpy as np

# Build the CNN–LSTM model
cnn_lstm = Sequential([
    # (1) Two causal Conv1D layers over the 11-step sequence
    Conv1D(64, kernel_size=3, activation='relu', padding='causal',
           input_shape=(11, 9)),
    Conv1D(32, kernel_size=3, activation='relu', padding='causal'),
    # (2) LSTM to capture longer‐range patterns
    LSTM(100, return_sequences=True),
    # (3) Map each timestep to a single output
    TimeDistributed(Dense(1, activation='linear'))
])
cnn_lstm.compile(loss='mse', optimizer='adam')

# Train for 100 epochs (same as your LSTM)
cnn_lstm.fit(
    X_train, y_train,
    epochs=100,
    batch_size=50,
    validation_data=(X_val, y_val),
    verbose=1
)

# Make predictions and flatten
y_pred_norm = cnn_lstm.predict(X_test).reshape(-1)
y_true_norm = y_test.reshape(-1)

# Denormalize back to W/m²
y_pred_w = denormalize(y_pred_norm)
y_true_w = denormalize(y_true_norm)

# Compute RMSE
rmse_cnn_lstm = sqrt(mean_squared_error(y_true_w, y_pred_w))
print(f"CNN–LSTM – Test RMSE: {rmse_cnn_lstm:.3f} W/m²")

# Append to your comparison table
rmse_df.loc[len(rmse_df)] = ["CNN–LSTM", f"{rmse_cnn_lstm:.3f}"]
rmse_df

Epoch 1/100


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m64/64[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m15s[0m 92ms/step - loss: 0.2046 - val_loss: 0.0596
Epoch 2/100
[1m64/64[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 78ms/step - loss: 0.0478 - val_loss: 0.0315
Epoch 3/100
[1m64/64[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 59ms/step - loss: 0.0281 - val_loss: 0.0252
Epoch 4/100
[1m64/64[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 23ms/step - loss: 0.0243 - val_loss: 0.0242
Epoch 5/100
[1m64/64[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 21ms/step - loss: 0.0231 - val_loss: 0.0238
Epoch 6/100
[1m64/64[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 21ms/step - loss: 0.0201 - val_loss: 0.0270
Epoch 7/100
[1m64/64[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 31ms/step - loss: 0.0208 - val_loss: 0.0219
Epoch 8/100
[1m64/64[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 21ms/step - loss: 0.0208 - val_loss: 0.0223
Epoch 9/100
[1m64/64[0m [32m━━━━━━━━━━━━━━━━━━━━

Unnamed: 0,Algorithm,RMSE (W/m²)
0,Persistence,208.996708
1,Linear Regression,218.56311
2,BPNN,101.002666
3,LSTM (100ep),75.615541
4,TCN (100ep),88.316993
5,CatBoost,82.430203
6,CNN–LSTM,76.13


In [None]:
import numpy as np
from sklearn.linear_model import Ridge
from sklearn.metrics import mean_squared_error
from math import sqrt

# 1) Build meta-features on the validation set
#    (so we train the combiner without peeking at test data)
val_lstm_n    = lstm.predict(X_val).reshape(-1)       # (days_val*11,)
val_cat_n     = cat.predict(X_val.reshape(-1,9))      # same length
val_cnnlstm_n = cnn_lstm.predict(X_val).reshape(-1)   # ditto

# stack into shape (n_samples, 3)
X_meta_train = np.vstack([val_lstm_n, val_cat_n, val_cnnlstm_n]).T
y_meta_train = y_val.reshape(-1)

# 2) Fit a ridge regression as meta-learner
meta = Ridge(alpha=1.0).fit(X_meta_train, y_meta_train)

# 3) Prepare meta-features on the test set
test_lstm_n    = lstm.predict(X_test).reshape(-1)
test_cat_n     = cat.predict(X_test.reshape(-1,9))
test_cnnlstm_n = cnn_lstm.predict(X_test).reshape(-1)

X_meta_test = np.vstack([test_lstm_n, test_cat_n, test_cnnlstm_n]).T

# 4) Meta predictions (normalized), then de-normalize
pred_meta_n = meta.predict(X_meta_test)
pred_meta_w = denormalize(pred_meta_n)
true_w      = denormalize(y_test.reshape(-1))

# 5) Compute RMSE
rmse_meta = sqrt(mean_squared_error(true_w, pred_meta_w))
print(f"Meta-Learner (LSTM+CatBoost+CNN–LSTM) – Test RMSE: {rmse_meta:.3f} W/m²")

# 6) Append to your comparison table
rmse_df.loc[len(rmse_df)] = [
    "Meta (LSTM+CatBoost+CNN–LSTM)",
    f"{rmse_meta:.3f}"
]
print(rmse_df)

[1m12/12[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step 
[1m12/12[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step
[1m12/12[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step 
[1m12/12[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 16ms/step
Meta-Learner (LSTM+CatBoost+CNN–LSTM) – Test RMSE: 69.402 W/m²
                       Algorithm RMSE (W/m²)
0                    Persistence  208.996708
1              Linear Regression   218.56311
2                           BPNN  101.002666
3                   LSTM (100ep)   75.615541
4                    TCN (100ep)   88.316993
5                       CatBoost   82.430203
6                       CNN–LSTM      76.130
7  Meta (LSTM+CatBoost+CNN–LSTM)      69.402
