In [2]:
import os
import numpy as np
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import (
    Dense, LSTM, GRU, SimpleRNN, Conv1D, MaxPooling1D,
    Flatten, Bidirectional
)
from tensorflow.keras.callbacks import EarlyStopping
import joblib

# ------------------------ Configuration ------------------------
selected_model = "all"  # Choose: "LSTM", "GRU", "BiLSTM", "CNN", "CNN-LSTM", "RNN", or "all"
EPOCHS = 50
BATCH_SIZE = 32
SEQUENCE_LENGTH = 24

# ------------------------ Load and Preprocess Data ------------------------
df = pd.read_csv("final_dataset_air.csv")
df.dropna(subset=['NowCast Conc.', 'Raw Conc.', 'AQI'], inplace=True)

X = df[['NowCast Conc.', 'Raw Conc.']].values
y = df[['AQI']].values

scaler_x = MinMaxScaler()
scaler_y = MinMaxScaler()

X_scaled = scaler_x.fit_transform(X)
y_scaled = scaler_y.fit_transform(y)

X_seq, y_seq = [], []
for i in range(SEQUENCE_LENGTH, len(X_scaled)):
    X_seq.append(X_scaled[i-SEQUENCE_LENGTH:i])
    y_seq.append(y_scaled[i])

X_seq = np.array(X_seq)
y_seq = np.array(y_seq)

split = int(0.8 * len(X_seq))
X_train, X_test = X_seq[:split], X_seq[split:]
y_train, y_test = y_seq[:split], y_seq[split:]

# ------------------------ Define Model Factory ------------------------
def build_model(model_name):
    model = Sequential()
    
    if model_name == "LSTM":
        model.add(LSTM(64, input_shape=(SEQUENCE_LENGTH, 2)))
    elif model_name == "GRU":
        model.add(GRU(64, input_shape=(SEQUENCE_LENGTH, 2)))
    elif model_name == "BiLSTM":
        model.add(Bidirectional(LSTM(64), input_shape=(SEQUENCE_LENGTH, 2)))
    elif model_name == "RNN":
        model.add(SimpleRNN(64, input_shape=(SEQUENCE_LENGTH, 2)))
    elif model_name == "CNN":
        model.add(Conv1D(64, kernel_size=3, activation='relu', input_shape=(SEQUENCE_LENGTH, 2)))
        model.add(Flatten())
        model.add(Dense(64, activation='relu'))
    elif model_name == "CNN-LSTM":
        model.add(Conv1D(filters=64, kernel_size=3, activation='relu', input_shape=(SEQUENCE_LENGTH, 2)))
        model.add(MaxPooling1D(pool_size=2))
        model.add(LSTM(50))
    else:
        raise ValueError("Invalid model name")

    model.add(Dense(1))
    model.compile(optimizer='adam', loss='mse', metrics=['mae'])
    return model

# ------------------------ Training Function ------------------------
def train_and_save_model(model_name):
    print(f"\n🔧 Training {model_name}...")
    model = build_model(model_name)

    early_stop = EarlyStopping(patience=5, restore_best_weights=True)
    model.fit(
        X_train, y_train,
        validation_data=(X_test, y_test),
        epochs=EPOCHS,
        batch_size=BATCH_SIZE,
        callbacks=[early_stop],
        verbose=1
    )

    os.makedirs("model", exist_ok=True)
    model.save(f"model/{model_name.lower().replace('-', '_')}_model.h5")
    print(f"✅ Saved model: model/{model_name.lower().replace('-', '_')}_model.h5")

# ------------------------ Save Scalers Once ------------------------
os.makedirs("model", exist_ok=True)
joblib.dump(scaler_x, "model/scaler_x.save")
joblib.dump(scaler_y, "model/scaler_y.save")

# ------------------------ Run Training ------------------------
model_list = ["LSTM", "GRU", "BiLSTM", "CNN", "CNN-LSTM", "RNN"]

if selected_model.lower() == "all":
    for m in model_list:
        train_and_save_model(m)
else:
    if selected_model not in model_list:
        raise ValueError(f"Invalid model: {selected_model}")
    train_and_save_model(selected_model)



🔧 Training LSTM...
Epoch 1/50


  super().__init__(**kwargs)


[1m1276/1276[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 8ms/step - loss: 0.0029 - mae: 0.0362 - val_loss: 7.2391e-04 - val_mae: 0.0194
Epoch 2/50
[1m1276/1276[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 7ms/step - loss: 8.8906e-04 - mae: 0.0213 - val_loss: 6.9388e-04 - val_mae: 0.0188
Epoch 3/50
[1m1276/1276[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 7ms/step - loss: 8.2441e-04 - mae: 0.0203 - val_loss: 6.3184e-04 - val_mae: 0.0174
Epoch 4/50
[1m1276/1276[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 8ms/step - loss: 7.4519e-04 - mae: 0.0186 - val_loss: 5.8731e-04 - val_mae: 0.0165
Epoch 5/50
[1m1276/1276[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m19s[0m 7ms/step - loss: 6.4576e-04 - mae: 0.0172 - val_loss: 5.6157e-04 - val_mae: 0.0161
Epoch 6/50
[1m1276/1276[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 10ms/step - loss: 6.3512e-04 - mae: 0.0168 - val_loss: 5.4665e-04 - val_mae: 0.0157
Epoch 7/50
[1m1276/1276[0m [32m━━━



✅ Saved model: model/lstm_model.h5

🔧 Training GRU...
Epoch 1/50
[1m1276/1276[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m14s[0m 10ms/step - loss: 0.0021 - mae: 0.0317 - val_loss: 7.6248e-04 - val_mae: 0.0205
Epoch 2/50
[1m1276/1276[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m13s[0m 10ms/step - loss: 8.2586e-04 - mae: 0.0210 - val_loss: 7.2926e-04 - val_mae: 0.0197
Epoch 3/50
[1m1276/1276[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m13s[0m 11ms/step - loss: 7.4615e-04 - mae: 0.0197 - val_loss: 6.7906e-04 - val_mae: 0.0183
Epoch 4/50
[1m1276/1276[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m13s[0m 10ms/step - loss: 7.2485e-04 - mae: 0.0186 - val_loss: 6.4312e-04 - val_mae: 0.0166
Epoch 5/50
[1m1276/1276[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m13s[0m 10ms/step - loss: 6.6348e-04 - mae: 0.0179 - val_loss: 5.7205e-04 - val_mae: 0.0157
Epoch 6/50
[1m1276/1276[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 10ms/step - loss: 6.5696e-04 - mae: 0.0173 - val_l



✅ Saved model: model/gru_model.h5

🔧 Training BiLSTM...
Epoch 1/50


  super().__init__(**kwargs)


[1m1276/1276[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m19s[0m 13ms/step - loss: 0.0024 - mae: 0.0339 - val_loss: 7.9477e-04 - val_mae: 0.0212
Epoch 2/50
[1m1276/1276[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m15s[0m 12ms/step - loss: 8.9330e-04 - mae: 0.0212 - val_loss: 6.5723e-04 - val_mae: 0.0185
Epoch 3/50
[1m1276/1276[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m16s[0m 12ms/step - loss: 7.7233e-04 - mae: 0.0197 - val_loss: 6.1558e-04 - val_mae: 0.0170
Epoch 4/50
[1m1276/1276[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m16s[0m 12ms/step - loss: 6.8329e-04 - mae: 0.0184 - val_loss: 6.0926e-04 - val_mae: 0.0172
Epoch 5/50
[1m1276/1276[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m16s[0m 12ms/step - loss: 6.5442e-04 - mae: 0.0177 - val_loss: 5.9094e-04 - val_mae: 0.0163
Epoch 6/50
[1m1276/1276[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m15s[0m 12ms/step - loss: 5.9214e-04 - mae: 0.0169 - val_loss: 5.5542e-04 - val_mae: 0.0155
Epoch 7/50
[1m1276/1276[0m [



✅ Saved model: model/bilstm_model.h5

🔧 Training CNN...
Epoch 1/50


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m1276/1276[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 3ms/step - loss: 0.0014 - mae: 0.0244 - val_loss: 5.7624e-04 - val_mae: 0.0156
Epoch 2/50
[1m1276/1276[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 3ms/step - loss: 5.7415e-04 - mae: 0.0158 - val_loss: 5.4712e-04 - val_mae: 0.0144
Epoch 3/50
[1m1276/1276[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 3ms/step - loss: 6.0801e-04 - mae: 0.0158 - val_loss: 5.0309e-04 - val_mae: 0.0140
Epoch 4/50
[1m1276/1276[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 3ms/step - loss: 5.2076e-04 - mae: 0.0150 - val_loss: 4.9247e-04 - val_mae: 0.0134
Epoch 5/50
[1m1276/1276[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 3ms/step - loss: 5.3561e-04 - mae: 0.0147 - val_loss: 4.8544e-04 - val_mae: 0.0133
Epoch 6/50
[1m1276/1276[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 3ms/step - loss: 5.5342e-04 - mae: 0.0147 - val_loss: 4.8917e-04 - val_mae: 0.0140
Epoch 7/50
[1m1276/1276[0m [32m━━━━━━━━━



✅ Saved model: model/cnn_model.h5

🔧 Training CNN-LSTM...
Epoch 1/50
[1m1276/1276[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 7ms/step - loss: 0.0027 - mae: 0.0330 - val_loss: 8.5394e-04 - val_mae: 0.0224
Epoch 2/50
[1m1276/1276[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 6ms/step - loss: 6.8429e-04 - mae: 0.0179 - val_loss: 6.4584e-04 - val_mae: 0.0179
Epoch 3/50
[1m1276/1276[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 6ms/step - loss: 6.8176e-04 - mae: 0.0174 - val_loss: 5.6956e-04 - val_mae: 0.0152
Epoch 4/50
[1m1276/1276[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 6ms/step - loss: 6.1227e-04 - mae: 0.0164 - val_loss: 5.3711e-04 - val_mae: 0.0149
Epoch 5/50
[1m1276/1276[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 6ms/step - loss: 6.0952e-04 - mae: 0.0162 - val_loss: 5.2330e-04 - val_mae: 0.0148
Epoch 6/50
[1m1276/1276[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 6ms/step - loss: 6.0063e-04 - mae: 0.0161 - val_loss: 5.



✅ Saved model: model/cnn_lstm_model.h5

🔧 Training RNN...
Epoch 1/50
[1m1276/1276[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 5ms/step - loss: 0.0051 - mae: 0.0381 - val_loss: 7.6060e-04 - val_mae: 0.0202
Epoch 2/50
[1m1276/1276[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 4ms/step - loss: 9.7389e-04 - mae: 0.0229 - val_loss: 0.0012 - val_mae: 0.0268
Epoch 3/50
[1m1276/1276[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 4ms/step - loss: 8.7928e-04 - mae: 0.0217 - val_loss: 7.2763e-04 - val_mae: 0.0200
Epoch 4/50
[1m1276/1276[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 4ms/step - loss: 8.1225e-04 - mae: 0.0207 - val_loss: 6.8458e-04 - val_mae: 0.0186
Epoch 5/50
[1m1276/1276[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 4ms/step - loss: 8.0250e-04 - mae: 0.0206 - val_loss: 6.9037e-04 - val_mae: 0.0192
Epoch 6/50
[1m1276/1276[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 4ms/step - loss: 7.5875e-04 - mae: 0.0196 - val_loss: 6.7474e



✅ Saved model: model/rnn_model.h5
