In [4]:
import random
from copy import deepcopy as dc
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader

from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import r2_score, mean_absolute_error, mean_squared_error

SEED = 42
random.seed(SEED)
np.random.seed(SEED)
torch.manual_seed(SEED)
torch.cuda.manual_seed_all(SEED)
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False

device = "cuda:0" if torch.cuda.is_available() else "cpu"
print(f"Using device: {device}")

CSV_PATH = "datasets/final/datasets_lagwise/PM_Combined_AQI_2022_2024.csv"

df = pd.read_csv(CSV_PATH)
df["DATE"] = pd.to_datetime(df["Date"], format="%d-%m-%y", dayfirst=True, errors="coerce")
df = df.dropna(subset=["DATE", "Daily_Mean_PM", "Daily_AQI_Value"]).reset_index(drop=True)

data = df[["DATE", "Daily_Mean_PM", "Daily_AQI_Value"]].copy()

print("\n--- 1. Original Data Frame (Head) ---")
print(data.head())
print("-" * 50)
print("Shape:", data.shape)

LAG = 1

data[f"AQI_Targeted_Value_LAG_{LAG}"] = data["Daily_AQI_Value"].shift(-(LAG))

supervised_df = (
    data[["DATE", "Daily_Mean_PM", "Daily_AQI_Value", f"AQI_Targeted_Value_LAG_{LAG}"]]
    .dropna()
    .reset_index(drop=True)
)

print("\n--- Supervised Data (Head) ---")
print(supervised_df.head())
print("-" * 50)
print("Shape:", supervised_df.shape)

print("\n--- Supervised Data (Tail) ---")
print(supervised_df.tail())

OUTPUT_PATH = "datasets/final2/datasets_lagwise/LAG1_PM_Combined_AQI_2022_2024.csv"
supervised_df.to_csv(OUTPUT_PATH, index=False)

print(f"\n Supervised data saved successfully to: {OUTPUT_PATH}")


Using device: cpu

--- 1. Original Data Frame (Head) ---
        DATE  Daily_Mean_PM  Daily_AQI_Value
0 2022-01-01            6.1        34.000000
1 2022-01-02            4.6        26.000000
2 2022-01-03            9.6        52.000000
3 2022-01-04            5.3        29.000000
4 2022-01-05            5.9        32.666667
--------------------------------------------------
Shape: (1094, 3)

--- Supervised Data (Head) ---
        DATE  Daily_Mean_PM  Daily_AQI_Value  AQI_Targeted_Value_LAG_1
0 2022-01-01            6.1        34.000000                 26.000000
1 2022-01-02            4.6        26.000000                 52.000000
2 2022-01-03            9.6        52.000000                 29.000000
3 2022-01-04            5.3        29.000000                 32.666667
4 2022-01-05            5.9        32.666667                 39.000000
--------------------------------------------------
Shape: (1093, 4)

--- Supervised Data (Tail) ---
           DATE  Daily_Mean_PM  Daily_AQI_Value