In [2]:
import pandas as pd

df=pd.read_csv("data/data_date.csv")
df.head()

Unnamed: 0,Date,Country,Status,AQI Value
0,2022-07-21,Albania,Good,14
1,2022-07-21,Algeria,Moderate,65
2,2022-07-21,Andorra,Moderate,55
3,2022-07-21,Angola,Unhealthy for Sensitive Groups,113
4,2022-07-21,Argentina,Moderate,63


In [3]:
import pandas as pd

df['Date'] = pd.to_datetime(df['Date'])
df = df.sort_values(['Country', 'Date'])

In [4]:
df = df.drop(columns=['Status'])

In [5]:
df['Country_ID'] = df['Country'].astype('category').cat.codes
df = df.drop(columns=['Country'])

In [6]:
from sklearn.preprocessing import MinMaxScaler

scaler = MinMaxScaler()
df['AQI Value'] = scaler.fit_transform(df[['AQI Value']])

In [7]:
import numpy as np

def create_sequences(data, seq_len):
    X, y = [], []
    for i in range(len(data) - seq_len):
        X.append(data[i:i+seq_len])
        y.append(data[i+seq_len])
    return np.array(X), np.array(y)

In [8]:
SEQ_LEN = 7
X_all, y_all = [], []

for _, group in df.groupby('Country_ID'):
    values = group[['AQI Value']].values
    if len(values) > SEQ_LEN:
        X, y = create_sequences(values, SEQ_LEN)
        X_all.append(X)
        y_all.append(y)

X = np.concatenate(X_all)
y = np.concatenate(y_all)

In [9]:
split = int(0.8 * len(X))
X_train, X_test = X[:split], X[split:]
y_train, y_test = y[:split], y[split:]

In [10]:
import torch
import torch.nn as nn

class AQILSTM(nn.Module):
    def __init__(self):
        super().__init__()
        self.lstm = nn.LSTM(input_size=1, hidden_size=64, num_layers=2, batch_first=True)
        self.fc = nn.Linear(64, 1)

    def forward(self, x):
        out, _ = self.lstm(x)
        out = out[:, -1, :]
        return self.fc(out)

In [11]:
model = AQILSTM()
criterion = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

In [14]:
from tqdm import tqdm

EPOCHS = 40

for epoch in tqdm(range(EPOCHS)):
    model.train()
    optimizer.zero_grad()
    
    outputs = model(torch.tensor(X_train, dtype=torch.float32))
    loss = criterion(outputs, torch.tensor(y_train, dtype=torch.float32))
    
    loss.backward()
    optimizer.step()
    
    if epoch % 5 == 0:
        print(f"Epoch {epoch}, Loss: {loss.item():.4f}")

  2%|▎         | 1/40 [00:00<00:16,  2.35it/s]

Epoch 0, Loss: 0.0022


 15%|█▌        | 6/40 [00:02<00:13,  2.45it/s]

Epoch 5, Loss: 0.0019


 28%|██▊       | 11/40 [00:04<00:11,  2.56it/s]

Epoch 10, Loss: 0.0015


 40%|████      | 16/40 [00:09<00:21,  1.12it/s]

Epoch 15, Loss: 0.0011


 52%|█████▎    | 21/40 [00:13<00:17,  1.10it/s]

Epoch 20, Loss: 0.0013


 65%|██████▌   | 26/40 [00:18<00:12,  1.10it/s]

Epoch 25, Loss: 0.0011


 78%|███████▊  | 31/40 [00:22<00:07,  1.13it/s]

Epoch 30, Loss: 0.0011


 90%|█████████ | 36/40 [00:26<00:03,  1.26it/s]

Epoch 35, Loss: 0.0011


100%|██████████| 40/40 [00:30<00:00,  1.33it/s]


In [15]:
model.eval()
with torch.no_grad():
    predictions = model(torch.tensor(X_test, dtype=torch.float32))
    test_loss = criterion(predictions, torch.tensor(y_test, dtype=torch.float32))

print("Test MSE:", test_loss.item())

Test MSE: 0.0014138702536001801


In [17]:
def predict_next_aqi(model, recent_aqi_values, scaler, seq_len):

    if len(recent_aqi_values) != seq_len:
        raise ValueError(f"Please provide exactly {seq_len} AQI values")

    # Convert to numpy and scale
    recent_aqi_values = np.array(recent_aqi_values).reshape(-1, 1)
    recent_aqi_scaled = scaler.transform(recent_aqi_values)

    # Create tensor [1, seq_len, 1]
    input_tensor = torch.tensor(
        recent_aqi_scaled.reshape(1, seq_len, 1),
        dtype=torch.float32
    )

    # Prediction
    model.eval()
    with torch.no_grad():
        pred_scaled = model(input_tensor).numpy()

    # Inverse scale
    predicted_aqi = scaler.inverse_transform(pred_scaled)[0][0]

    return predicted_aqi

In [18]:
recent_aqi = [55, 60, 58, 62, 65, 63, 66]  # last 7 days AQI
predicted_aqi = predict_next_aqi(
    model=model,
    recent_aqi_values=recent_aqi,
    scaler=scaler,
    seq_len=7
)

print(f"Predicted AQI for next day: {predicted_aqi:.2f}")


Predicted AQI for next day: 60.51




In [19]:
def aqi_status(aqi):
    if aqi <= 50:
        return "Good"
    elif aqi <= 100:
        return "Moderate"
    elif aqi <= 150:
        return "Unhealthy for Sensitive Groups"
    elif aqi <= 200:
        return "Unhealthy"
    elif aqi <= 300:
        return "Very Unhealthy"
    else:
        return "Hazardous"
print("AQI Status:", aqi_status(predicted_aqi))

AQI Status: Moderate
