In [295]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import numpy as np, pandas as pd, tensorflow as tf
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_absolute_error, mean_squared_error
import joblib

df = pd.read_csv('../data/iot_telemetry_data_processed.csv')


In [296]:
# Convert 'ts' to datetime, sort, and set as index
df['datetime_col'] = pd.to_datetime(df['ts'], unit='s')
df = df.sort_values('datetime_col')
df_indexed = df.set_index('datetime_col')

In [None]:
pm2_5_series = df_indexed['pm2_5']
print(len(pm2_5_series))

In [298]:
def plot_series(pm2_5_series):
    plt.figure(figsize=(12, 6))
    pm2_5_series.plot(title='pm2_5 Series')
    plt.show()
    

In [None]:
plot_series(pm2_5_series)

In [300]:
# data preparation
HORIZON    = 12            # one hour of 5-minute data
LOOK_BACK  = 288           # 24 h history

series = pm2_5_series.sort_index()          # ensure chronological order
values = series.values.reshape(-1, 1).astype("float32")

scaler = MinMaxScaler()
scaled = scaler.fit_transform(values)       # 0-to-1 scaling

# build windows of shape (N, LOOK_BACK, 1) and labels (N, HORIZON)
X, y = [], []
for i in range(len(scaled) - LOOK_BACK - HORIZON + 1):
    X.append(scaled[i : i + LOOK_BACK])
    y.append(scaled[i + LOOK_BACK : i + LOOK_BACK + HORIZON])
X = np.array(X, dtype="float32")
y = np.array(y, dtype="float32")

In [301]:
# split: 70 % train, 15 % val, 15 % test
n_total = len(X)
n_train = int(0.70 * n_total)
n_val   = int(0.85 * n_total)

X_train, y_train = X[:n_train],         y[:n_train]
X_val,   y_val   = X[n_train:n_val],   y[n_train:n_val]
X_test,  y_test  = X[n_val:],          y[n_val:]

batch = 128
train_ds = tf.data.Dataset.from_tensor_slices((X_train, y_train)).shuffle(1000).batch(batch).prefetch(1)
val_ds   = tf.data.Dataset.from_tensor_slices((X_val,   y_val  )).batch(batch).prefetch(1)
test_ds  = tf.data.Dataset.from_tensor_slices((X_test,  y_test )).batch(batch).prefetch(1)

In [302]:
# model definition
model = tf.keras.Sequential([
    tf.keras.layers.Input(shape=(LOOK_BACK, 1)),
    tf.keras.layers.LSTM(128, return_sequences=False),
    tf.keras.layers.Dense(HORIZON)        
])

model.compile(
    optimizer = tf.keras.optimizers.Adam(1e-3),
    loss      = "mse",
    metrics   = [tf.keras.metrics.RootMeanSquaredError(name="rmse")]
)

callback = tf.keras.callbacks.EarlyStopping(
    monitor  = "val_loss",
    patience = 10,
    restore_best_weights = True
)

In [None]:
training
history = model.fit(
    train_ds,
    epochs   = 8,
    validation_data = val_ds,
    callbacks = [callback],
    verbose   = 2
)

plt.figure(figsize=(12, 4))
plt.subplot(1, 2, 1)
plt.plot(history.history['loss'], label='Training Loss')
plt.title('Model Loss')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.legend()

In [None]:
# save or load

# model.save("pm2_5_model.keras")
# joblib.dump(scaler, "pm2_5_minmax.pkl") 

# model  = tf.keras.models.load_model("pm2_5_model.keras")


In [None]:
# evaluation
pred_scaled = model.predict(test_ds, verbose=0)              # (N, 12)
targ_scaled = np.vstack([y for _, y in test_ds])             # (N, 12)

# inverse-scale to original units
def invert(arr): return scaler.inverse_transform(arr.reshape(-1,1)).reshape(arr.shape)

preds_mu   = invert(pred_scaled)
targets_mu = invert(targ_scaled)

targets_mu = targets_mu.squeeze(-1)

mae  = mean_absolute_error(targets_mu, preds_mu)
rmse = np.sqrt(mean_squared_error(targets_mu, preds_mu))

print(f"Test MAE  {mae:8.3f}   |   RMSE  {rmse:8.3f}")

In [None]:

def plot_recent_forecasts(
        series,          
        preds_mu,        
        targets_mu,      
        look_back,       
        horizon,         
        num_windows=5):  

    base_start = len(series) - look_back - horizon          # 0-based index
    fig, axes = plt.subplots(num_windows, 1,
                             figsize=(10, 3*num_windows),
                             sharex=False)
    if num_windows == 1:                                      # make iterable
        axes = [axes]

    for i in range(num_windows):
        ax        = axes[i]
        win_start = base_start - i                           # ith window back
        t_axis    = series.index[win_start + look_back :
                                 win_start + look_back + horizon]

        truth = targets_mu[ -(i+1) ].squeeze()               # newest first
        pred  =  preds_mu[ -(i+1) ].squeeze()

        ax.plot(t_axis, truth, marker="o", label="actual")
        ax.plot(t_axis, pred , marker="x", label="predicted")
        ax.set_title(f"Window starting {t_axis[0]}")
        ax.set_ylabel("PM2.5 (µg/m³)")
        ax.legend()

    axes[-1].set_xlabel("time")
    plt.tight_layout()
    plt.show()



In [None]:
plt.figure(figsize=(5,5))
plt.scatter(targets_mu.flatten(), preds_mu.flatten(), alpha=0.35)
lims = [
    min(plt.xlim()[0], plt.ylim()[0]),
    max(plt.xlim()[1], plt.ylim()[1])
]
plt.plot(lims, lims, linewidth=1)  
plt.xlabel("actual  (µg/m³)")
plt.ylabel("predicted  (µg/m³)")
plt.title("All test-set forecasts")
plt.tight_layout()
plt.show()

In [None]:
plot_recent_forecasts(
    series     = pm2_5_series,
    preds_mu   = preds_mu,
    targets_mu = targets_mu,
    look_back  = LOOK_BACK,
    horizon    = HORIZON,
    num_windows=50       
)


In [None]:
def plot_full_test_forecast(
        series,     
        preds_mu,     
        look_back,     
        horizon):      

    n_total_windows = len(series) - look_back - horizon + 1
    start_test_window = n_total_windows - preds_mu.shape[0]

    pred_dict = {}                          

    for w_local in range(preds_mu.shape[0]):          
        w_global = start_test_window + w_local       
       
        preds_w  = preds_mu[w_local].squeeze()       
        for j in range(horizon):
            t_idx = w_global + look_back + j          
            ts    = series.index[t_idx]               
            pred_dict.setdefault(ts, []).append(preds_w[j])

    pred_series = pd.Series({ts: np.mean(vals) for ts, vals in pred_dict.items()}
                            ).sort_index()

    truth_series = series.loc[pred_series.index]

    plt.figure(figsize=(12, 4))
    plt.plot(truth_series.index, truth_series.values, label="actual", linewidth=1.2)
    plt.plot(pred_series.index , pred_series.values , label="predicted", linewidth=1.0)
    plt.title(f"Full test span: {len(pred_series)} five-minute steps")
    plt.xlabel("time")
    plt.ylabel("PM2.5 (µg/m³)")
    plt.legend()
    plt.tight_layout()
    plt.show()

plot_full_test_forecast(
    series     = pm2_5_series,
    preds_mu   = preds_mu,
    look_back  = LOOK_BACK,
    horizon    = HORIZON
)
