In [None]:
!pip install python-dotenv
!pip install hopsworks --upgrade --quiet
!pip install numpy==1.23.5
!pip install --upgrade jax jaxlib

from google.colab import drive
drive.mount('/content/drive')

In [None]:
import os
from dotenv import load_dotenv
import hopsworks
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler



env_path = '/content/drive/MyDrive/AQI_Predictor/.env'
load_dotenv(dotenv_path=env_path)
os.environ['HOPSWORKS_API_KEY'] = os.getenv('HOPSWORKS_API_KEY')

project = hopsworks.login(api_key_value=os.environ['HOPSWORKS_API_KEY'])
fs = project.get_feature_store()
feature_group = fs.get_feature_group("karachi_aqi_features", version=2)
df = feature_group.read()



To ensure compatibility please install the latest bug fix release matching the minor version of your backend (4.2) by running 'pip install hopsworks==4.2.*'



Logged in to project, explore it here https://c.app.hopsworks.ai:443/p/1241247
Finished: Reading data from Hopsworks, using Hopsworks Feature Query Service (2.47s) 


In [None]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler, StandardScaler

feature_cols = [
    'carbon_monoxide', 'cloud_coverage', 'day', 'hour', 'humidity', 'is_weekend',
    'month', 'nitrogen_dioxide', 'ozone', 'pm_ratio', 'pm10', 'pm10_lag1', 'pm10_lag3',
    'pm2_5', 'pm2_5_lag1', 'pm2_5_lag3', 'pm2_5_roll_mean_3', 'pm2_5_roll_std_6',
    'pressure', 'temp_humidity_index', 'temperature', 'temperature_lag1', 'temperature_lag3',
    'temperature_roll_mean_3', 'temperature_roll_std_6', 'weekday', 'wind_deg', 'wind_speed'
]

target_cols = {
    "day1": "target_pm2_5_avg_day1",
    "day2": "target_pm2_5_avg_day2",
    "day3": "target_pm2_5_avg_day3"
}

df = df.dropna(subset=feature_cols + list(target_cols.values()))
X = df[feature_cols].values
y = df[list(target_cols.values())].values

# Normalize features
x_scaler = MinMaxScaler()
X_scaled = x_scaler.fit_transform(X)

y_scaler = StandardScaler()
y_scaled = y_scaler.fit_transform(y)

# Reshape for LSTM [samples, time_steps, features]
X_reshaped = X_scaled.reshape((X_scaled.shape[0], 1, X_scaled.shape[1]))

X_train, X_test, y_train, y_test = train_test_split(X_reshaped, y_scaled, test_size=0.2, random_state=42)

In [None]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dropout, Dense
from tensorflow.keras.callbacks import EarlyStopping
import joblib
import tensorflow as tf

model = Sequential()
model.add(LSTM(128, input_shape=(X_train.shape[1], X_train.shape[2]), return_sequences=True))
model.add(Dropout(0.3))
model.add(LSTM(64, return_sequences=False))
model.add(Dropout(0.2))
model.add(Dense(64, activation='relu'))
model.add(Dense(3))  # 3-day PM2.5 predictions

optimizer = tf.keras.optimizers.Adam(learning_rate=0.0001)
model.compile(optimizer=optimizer, loss='mse', metrics=['mae'])

early_stop = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)

history = model.fit(
    X_train, y_train,
    epochs=500,
    batch_size=32,
    validation_split=0.2,
    callbacks=[early_stop],
    verbose=1
)

test_loss, test_mae = model.evaluate(X_test, y_test)
print("Test MSE:", test_loss)
print("Test MAE:", test_mae)

y_pred_scaled = model.predict(X_test)
y_pred = y_scaler.inverse_transform(y_pred_scaled)
y_actual = y_scaler.inverse_transform(y_test)

for i in range(5):
    print(f"Predicted: {y_pred[i]}, Actual: {y_actual[i]}")

model_path = '/content/drive/MyDrive/AQI_Predictor/LSTM_3days.h5'
model.save(model_path)
print("Model saved in Google Drive succesfully.")

Epoch 1/500




[1m114/114[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 10ms/step - loss: 0.9807 - mae: 0.7274 - val_loss: 1.0555 - val_mae: 0.7652
Epoch 2/500
[1m114/114[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 7ms/step - loss: 1.0214 - mae: 0.7432 - val_loss: 0.9926 - val_mae: 0.7400
Epoch 3/500
[1m114/114[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 9ms/step - loss: 0.8717 - mae: 0.6890 - val_loss: 0.8053 - val_mae: 0.6737
Epoch 4/500
[1m114/114[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 11ms/step - loss: 0.7455 - mae: 0.6472 - val_loss: 0.6889 - val_mae: 0.6252
Epoch 5/500
[1m114/114[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 11ms/step - loss: 0.6574 - mae: 0.6187 - val_loss: 0.6685 - val_mae: 0.6163
Epoch 6/500
[1m114/114[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 7ms/step - loss: 0.6613 - mae: 0.6201 - val_loss: 0.6599 - val_mae: 0.6102
Epoch 7/500
[1m114/114[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 7ms/step - lo



Predicted: [30.752802 35.818226 38.42957 ], Actual: [28.26666667 39.72083333 39.07916667]
Predicted: [24.162348 26.847345 27.445335], Actual: [17.55833333 20.1625     31.05833333]
Predicted: [25.803955 24.15629  24.817509], Actual: [30.05833333 34.27916667 23.02083333]
Predicted: [19.298191 21.74514  22.615215], Actual: [18.475      23.04583333 27.40833333]
Predicted: [27.420732 28.282743 28.494852], Actual: [29.4125     24.62083333 27.39583333]
Model saved in Google Drive succesfully.


In [None]:
import tempfile
import os

# Save scaler as well (needed during inference)
x_scaler_path = "/content/drive/MyDrive/AQI_Predictor/x_scaler.pkl"
y_scaler_path = "/content/drive/MyDrive/AQI_Predictor/y_scaler.pkl"
joblib.dump(x_scaler, x_scaler_path)
joblib.dump(y_scaler, y_scaler_path)

model_registry = project.get_model_registry()
model_dir = tempfile.mkdtemp()

# Copy files to temp dir
joblib.dump(x_scaler, os.path.join(model_dir, "x_scaler.pkl"))
joblib.dump(y_scaler, os.path.join(model_dir, "y_scaler.pkl"))

# Register model
model_meta = model_registry.python.create_model(
    name="lstm_3day_pm25_predictor",
    metrics={"mse": float(model.evaluate(X_test, y_test)[0])},
    description="LSTM model predicting PM2.5 for next 3 days",
    input_example=X_test[:1]
)

model_meta.save(model_dir)

[1m36/36[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - loss: 0.3229 - mae: 0.4292




  0%|          | 0/6 [00:00<?, ?it/s]

Uploading /tmp/tmpz_lyw55k/y_scaler.pkl: 0.000%|          | 0/671 elapsed<00:00 remaining<?

Uploading /tmp/tmpz_lyw55k/x_scaler.pkl: 0.000%|          | 0/1783 elapsed<00:00 remaining<?

Uploading /content/input_example.json: 0.000%|          | 0/513 elapsed<00:00 remaining<?

Model created, explore it at https://c.app.hopsworks.ai:443/p/1241247/models/lstm_3day_pm25_predictor/2


Model(name: 'lstm_3day_pm25_predictor', version: 2)