<a href="https://colab.research.google.com/github/abrham17/state_space_model/blob/main/state_space_model.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import normalize
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader


In [None]:
import kagglehub

path = kagglehub.dataset_download("sumanthvrao/daily-climate-time-series-data")

df = pd.read_csv('/kaggle/input/daily-climate-time-series-data/DailyDelhiClimateTest.csv').dropna()
df_train = pd.read_csv('/kaggle/input/daily-climate-time-series-data/DailyDelhiClimateTrain.csv').dropna()

features = ['meantemp', 'humidity', 'wind_speed', 'meanpressure']
df[features] = normalize(df[features], norm='l2')
df_train[features] = normalize(df_train[features], norm='l2')


In [None]:
def split_sequence(df, n_steps=25, out_step=5):
    xs, ys = [], []
    for i in range(len(df) - n_steps - out_step):
        x = df[features].iloc[i:i+n_steps].values
        y = df[features].iloc[i+n_steps:i+n_steps+out_step].values
        xs.append(x)
        ys.append(y)
    return xs, ys

x_seq, y_seq = split_sequence(df_train)

In [None]:
class SequenceDataset(Dataset):
    def __init__(self, xs, ys):
        self.xs = torch.tensor(xs).float()
        self.ys = torch.tensor(ys).float()
    def __len__(self):
        return len(self.xs)
    def __getitem__(self, idx):
        return self.xs[idx], self.ys[idx]

dataset = SequenceDataset(x_seq, y_seq)
dataloader = DataLoader(dataset, batch_size=32, shuffle=True)


  self.xs = torch.tensor(xs).float()


In [None]:
class ClimateLSTM(nn.Module):
    def __init__(self, input_size=4, hidden_dim=256, output_dim=4, out_step=5):
        super(ClimateLSTM, self).__init__()
        self.lstm = nn.LSTM(input_size, hidden_dim, batch_first=True)
        self.fc = nn.Linear(hidden_dim, output_dim)
        self.out_step = out_step

    def forward(self, x):
        batch_size = x.size(0)
        lstm_out, _ = self.lstm(x)
        # Take the last hidden state and repeat it for each future step
        last_hidden = lstm_out[:, -1, :]
        out = self.fc(last_hidden)  # shape: (batch, output_dim)
        out = out.unsqueeze(1).repeat(1, self.out_step, 1)  # shape: (batch, out_step, output_dim)
        return out

# Instantiate model
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = ClimateLSTM().to(device)
optimizer = optim.Adam(model.parameters(), lr=0.001)
criterion = nn.MSELoss()


In [None]:
def train_model(model, optimizer, dataloader, device, epochs=30):
    model.train()
    for epoch in range(epochs):
        epoch_loss = 0
        for inputs, targets in dataloader:
            inputs, targets = inputs.to(device), targets.to(device)
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, targets)
            loss.backward()
            optimizer.step()
            epoch_loss += loss.item()
        print(f"Epoch {epoch+1}/{epochs} - Loss: {epoch_loss / len(dataloader):.4f}")

# Train the model
train_model(model, optimizer, dataloader, device)

Epoch 1/30 - Loss: 0.0494
Epoch 2/30 - Loss: 0.0009
Epoch 3/30 - Loss: 0.0007
Epoch 4/30 - Loss: 0.0007
Epoch 5/30 - Loss: 0.0007
Epoch 6/30 - Loss: 0.0007
Epoch 7/30 - Loss: 0.0007
Epoch 8/30 - Loss: 0.0007
Epoch 9/30 - Loss: 0.0007
Epoch 10/30 - Loss: 0.0007
Epoch 11/30 - Loss: 0.0007
Epoch 12/30 - Loss: 0.0007
Epoch 13/30 - Loss: 0.0007
Epoch 14/30 - Loss: 0.0007
Epoch 15/30 - Loss: 0.0007
Epoch 16/30 - Loss: 0.0007
Epoch 17/30 - Loss: 0.0007
Epoch 18/30 - Loss: 0.0007
Epoch 19/30 - Loss: 0.0007
Epoch 20/30 - Loss: 0.0007
Epoch 21/30 - Loss: 0.0007
Epoch 22/30 - Loss: 0.0007
Epoch 23/30 - Loss: 0.0007
Epoch 24/30 - Loss: 0.0007
Epoch 25/30 - Loss: 0.0007
Epoch 26/30 - Loss: 0.0007
Epoch 27/30 - Loss: 0.0007
Epoch 28/30 - Loss: 0.0007
Epoch 29/30 - Loss: 0.0007
Epoch 30/30 - Loss: 0.0007


In [None]:
last_sequence = df_train[['meantemp','humidity', 'wind_speed','meanpressure']].values[-25:]
last_sequence = torch.tensor(last_sequence).float().unsqueeze(0)

In [None]:
model.eval()
with torch.no_grad():
    predicted = model(last_sequence)

In [None]:
predicted = predicted.squeeze(0).numpy()  # shape: (5, 4)
print("Next 5 predicted timesteps:")
for i, step in enumerate(predicted, 1):
    print(f"Step {i}: Temp={step[0]:.2f}, Humidity={step[1]:.2f}, Wind={step[2]:.2f}, Pressure={step[3]:.2f}")


Next 5 predicted timesteps:
Step 1: Temp=0.02, Humidity=0.07, Wind=0.01, Pressure=0.99
Step 2: Temp=0.02, Humidity=0.07, Wind=0.01, Pressure=0.99
Step 3: Temp=0.02, Humidity=0.07, Wind=0.01, Pressure=0.99
Step 4: Temp=0.02, Humidity=0.07, Wind=0.01, Pressure=0.99
Step 5: Temp=0.02, Humidity=0.07, Wind=0.01, Pressure=0.99


In [None]:
df_train.head()

Unnamed: 0,date,meantemp,humidity,wind_speed,meanpressure
0,2013-01-01,0.009811,0.082906,0.0,0.996509
1,2013-01-02,0.007241,0.090021,0.002916,0.995909
2,2013-01-03,0.00701,0.085093,0.004532,0.996338
3,2013-01-04,0.008499,0.069955,0.00121,0.997513
4,2013-01-05,0.005881,0.085112,0.003627,0.996347
