In [None]:
# --- STEP 1: IMPORT LIBRARIES ---
import numpy as np
import pandas as pd
import tensorflow as tf
from sklearn.preprocessing import OneHotEncoder
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense

In [None]:
# --- STEP 2: LOAD YOUR DATA ---
from google.colab import files
upload = files.upload()
model_data = pd.read_csv("Kolkata_AQI_Daily - Copy (3).csv")

Saving Kolkata_AQI_Daily - Copy (3).csv to Kolkata_AQI_Daily - Copy (3).csv


In [None]:
# --- STEP 3: SPLIT INTO TRAIN AND TEST ---
test_size = 366
train_data = model_data[:-test_size].reset_index(drop=True)
test_data = model_data[-test_size:].reset_index(drop=True)

In [None]:
# --- STEP 4: ENCODER SETUP ---
n_classes = 6
encoder = OneHotEncoder(sparse_output=False)
encoder.fit(np.array(train_data['Category']).reshape(-1, 1))

def encode_categories(data):
    return encoder.transform(np.array(data['Category']).reshape(-1, 1))

# --- STEP 5: SEQUENCE CREATION FUNCTION ---
def create_sequences(data, sequence_length=5, forecast_horizon=1):
    X, y = [], []
    encoded = encode_categories(data)
    for i in range(len(encoded) - sequence_length - forecast_horizon + 1):
        X.append(encoded[i:i+sequence_length])
        y.append(data['Category'].iloc[i+sequence_length + forecast_horizon - 1])
    return np.array(X), np.array(y)

sequence_length = 5

In [None]:
# --- STEP 6: BUILD AND TRAIN LSTM ---
def build_model(input_shape):
    model = Sequential()
    model.add(LSTM(64, input_shape=input_shape))
    model.add(Dense(n_classes, activation='softmax'))
    model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
    return model

# Train models for 1-step, 2-step, 3-step
X_train_1, y_train_1 = create_sequences(train_data, sequence_length, forecast_horizon=1)
X_train_2, y_train_2 = create_sequences(train_data, sequence_length, forecast_horizon=2)
X_train_3, y_train_3 = create_sequences(train_data, sequence_length, forecast_horizon=3)

model_1 = build_model((sequence_length, n_classes))
model_1.fit(X_train_1, y_train_1, epochs=10, batch_size=32, verbose=1)

model_2 = build_model((sequence_length, n_classes))
model_2.fit(X_train_2, y_train_2, epochs=10, batch_size=32, verbose=1)

model_3 = build_model((sequence_length, n_classes))
model_3.fit(X_train_3, y_train_3, epochs=10, batch_size=32, verbose=1)

Epoch 1/10


  super().__init__(**kwargs)


[1m57/57[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 6ms/step - accuracy: 0.3093 - loss: 1.6719
Epoch 2/10
[1m57/57[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - accuracy: 0.6096 - loss: 0.9693
Epoch 3/10
[1m57/57[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - accuracy: 0.6011 - loss: 0.9184
Epoch 4/10
[1m57/57[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 5ms/step - accuracy: 0.6547 - loss: 0.8595
Epoch 5/10
[1m57/57[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - accuracy: 0.6689 - loss: 0.8369
Epoch 6/10
[1m57/57[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - accuracy: 0.6867 - loss: 0.8203
Epoch 7/10
[1m57/57[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - accuracy: 0.7126 - loss: 0.7909
Epoch 8/10
[1m57/57[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 5ms/step - accuracy: 0.6985 - loss: 0.8078
Epoch 9/10
[1m57/57[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1

<keras.src.callbacks.history.History at 0x798bc7159110>

In [None]:
# --- STEP 7: FORECAST ACCURACY FUNCTION ---
from sklearn.metrics import classification_report, accuracy_score, precision_score, recall_score, f1_score

def forecast_metrics(model, test_data, sequence_length=5, forecast_horizon=1):
    true_labels = []
    pred_labels = []
    encoded_test = encode_categories(test_data)

    for i in range(len(encoded_test) - sequence_length - forecast_horizon + 1):
        input_seq = encoded_test[i:i+sequence_length].reshape(1, sequence_length, n_classes)
        true_val = test_data['Category'].iloc[i + sequence_length + forecast_horizon - 1]
        pred = model.predict(input_seq, verbose=0)
        pred_label = np.argmax(pred)

        true_labels.append(true_val)
        pred_labels.append(pred_label)

    # Overall metrics
    accuracy = accuracy_score(true_labels, pred_labels) * 100
    precision_macro = precision_score(true_labels, pred_labels, average='weighted', zero_division=0) * 100
    recall_macro = recall_score(true_labels, pred_labels, average='weighted', zero_division=0) * 100
    f1_macro = f1_score(true_labels, pred_labels, average='weighted', zero_division=0) * 100

    # Per-class metrics
    class_report = classification_report(true_labels, pred_labels, output_dict=True, zero_division=0)

    return {
        'accuracy': accuracy,
        'precision_macro': precision_macro,
        'recall_macro': recall_macro,
        'f1_macro': f1_macro,
        'per_class': class_report
    }

def print_metrics(name, metrics):
    print(f"\n{name}-step Forecast:")
    print(f"Accuracy:       {metrics['accuracy']:.2f}%")
    print(f"Weighted Macro Precision:{metrics['precision_macro']:.2f}%")
    print(f"Weighted Macro Recall:   {metrics['recall_macro']:.2f}%")
    print(f"Weighted Macro F1 Score: {metrics['f1_macro']:.2f}%\n")

   # print("Per-Class Metrics (%):")
   # labels = sorted([str(k) for k in metrics['per_class'].keys() if k.isdigit()])
   # for label in labels:
   #     pc = metrics['per_class'][label]
   #     print(f"Class {label}: Precision={pc['precision']*100:.2f}%, Recall={pc['recall']*100:.2f}%, F1 Score={pc['f1-score']*100:.2f}%")

In [None]:
metrics_1 = forecast_metrics(model_1, test_data, sequence_length, forecast_horizon=1)
metrics_2 = forecast_metrics(model_2, test_data, sequence_length, forecast_horizon=2)
metrics_3 = forecast_metrics(model_3, test_data, sequence_length, forecast_horizon=3)

print_metrics(1, metrics_1)
print_metrics(2, metrics_2)
print_metrics(3, metrics_3)


1-step Forecast:
Accuracy:       73.41%
Weighted Macro Precision:73.43%
Weighted Macro Recall:   73.41%
Weighted Macro F1 Score: 73.41%


2-step Forecast:
Accuracy:       65.28%
Weighted Macro Precision:65.33%
Weighted Macro Recall:   65.28%
Weighted Macro F1 Score: 65.16%


3-step Forecast:
Accuracy:       59.33%
Weighted Macro Precision:59.33%
Weighted Macro Recall:   59.33%
Weighted Macro F1 Score: 59.00%

