<a href="https://colab.research.google.com/github/abrham17/state_space_model/blob/main/ssm.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import kagglehub
from sklearn.preprocessing import normalize
path = kagglehub.dataset_download("sumanthvrao/daily-climate-time-series-data")

df = pd.read_csv('/kaggle/input/daily-climate-time-series-data/DailyDelhiClimateTest.csv').dropna()
df_train = pd.read_csv('/kaggle/input/daily-climate-time-series-data/DailyDelhiClimateTrain.csv').dropna()

features = ['meantemp', 'humidity', 'wind_speed', 'meanpressure']
df[features] = normalize(df[features], norm='l2')
df_train[features] = normalize(df_train[features], norm='l2')


In [None]:
import pandas as pd
import numpy as np
import sklearn
import torch
from torch import nn
from torch.utils.data import Dataset, DataLoader

def create_sequences(data, input_len=30, pred_len=1):
    X, y = [], []
    for i in range(len(data) - input_len - pred_len):
        X.append(data[i:i+input_len])
        y.append(data[i+input_len:i+input_len+pred_len])
    return np.array(X), np.array(y)

X, y = create_sequences(df[features], input_len=30, pred_len=1)


In [None]:
class ClimateDataset(Dataset):
    def __init__(self, X, y):
        self.X = torch.tensor(X, dtype=torch.float32).unsqueeze(-1)
        self.y = torch.tensor(y, dtype=torch.float32)
    def __len__(self):
        return len(self.X)
    def __getitem__(self, idx):
        return self.X[idx], self.y[idx]

dataset = ClimateDataset(X, y)
dataloader = DataLoader(dataset, batch_size=64, shuffle=True)


TypeError: can't convert np.ndarray of type numpy.object_. The only supported types are: float64, float32, float16, complex64, complex128, int64, int32, int16, int8, uint64, uint32, uint16, uint8, and bool.

In [None]:
import torch.nn as nn

class LSTMForecast(nn.Module):
    def __init__(self, input_dim=1, hidden_dim=64, output_dim=1):
        super().__init__()
        self.lstm = nn.LSTM(input_dim, hidden_dim, batch_first=True)
        self.fc = nn.Linear(hidden_dim, output_dim)
    def forward(self, x):
        out, _ = self.lstm(x)
        out = self.fc(out[:, -1, :])  # take last output
        return out


In [None]:
class SimpleSSM(nn.Module):
    def __init__(self, input_dim=1, state_dim=64, output_dim=1):
        super().__init__()
        self.A = nn.Parameter(torch.randn(state_dim, state_dim) * 0.01)
        self.B = nn.Parameter(torch.randn(state_dim, input_dim) * 0.01)
        self.C = nn.Parameter(torch.randn(output_dim, state_dim) * 0.01)
        self.state = None

    def forward(self, x):
        Bx = torch.einsum('btd,sd->bts', x, self.B)  # input -> state
        batch_size, seq_len, _ = Bx.shape
        h = torch.zeros((batch_size, self.A.size(0)), device=x.device)
        for t in range(seq_len):
            h = torch.matmul(h, self.A) + Bx[:, t, :]
        y = torch.matmul(h, self.C.t())
        return y


In [None]:
def train_model(model, dataloader, epochs=20, lr=1e-3):
    model.train()
    optimizer = torch.optim.Adam(model.parameters(), lr=lr)
    criterion = nn.MSELoss()
    for epoch in range(epochs):
        total_loss = 0
        for x_batch, y_batch in dataloader:
            optimizer.zero_grad()
            y_pred = model(x_batch)
            loss = criterion(y_pred, y_batch)
            loss.backward()
            optimizer.step()
            total_loss += loss.item()
        print(f"Epoch {epoch+1}: Loss={total_loss / len(dataloader):.4f}")


In [None]:
def evaluate_model(model, X, y_true):
    model.eval()
    with torch.no_grad():
        X_tensor = torch.tensor(X, dtype=torch.float32).unsqueeze(-1)
        preds = model(X_tensor).squeeze().numpy()
    y_true = y_true.squeeze()
    preds = scaler.inverse_transform(preds.reshape(-1, 1)).flatten()
    y_true = scaler.inverse_transform(y_true.reshape(-1, 1)).flatten()

    # Convert to PyTorch tensors
    y_true_tensor = torch.tensor(y_true, dtype=torch.float32)
    preds_tensor = torch.tensor(preds, dtype=torch.float32)

    # Create an instance of MSELoss and calculate the loss
    criterion = nn.MSELoss()
    mse = criterion(preds_tensor, y_true_tensor)


    rmse = np.sqrt(mse.item()) # get the value from the tensor
    mae = sklearn.metrics.mean_absolute_error(y_true, preds)
    r2 = sklearn.metrics.r2_score(y_true, preds)

    print(f"RMSE: {rmse:.3f}, MAE: {mae:.3f}, R²: {r2:.3f}")
    return preds

In [None]:
# Train LSTM
lstm_model = LSTMForecast()
train_model(lstm_model, dataloader)
print("\nLSTM Evaluation:")
evaluate_model(lstm_model, X, y)

# Train SSM
ssm_model = SimpleSSM()
train_model(ssm_model, dataloader)
print("\nSSM Evaluation:")
evaluate_model(ssm_model, X, y)


Epoch 1: Loss=0.2436
Epoch 2: Loss=0.0333
Epoch 3: Loss=0.0218
Epoch 4: Loss=0.0136
Epoch 5: Loss=0.0064
Epoch 6: Loss=0.0056
Epoch 7: Loss=0.0052
Epoch 8: Loss=0.0052
Epoch 9: Loss=0.0050
Epoch 10: Loss=0.0049
Epoch 11: Loss=0.0046
Epoch 12: Loss=0.0047
Epoch 13: Loss=0.0046
Epoch 14: Loss=0.0043
Epoch 15: Loss=0.0042
Epoch 16: Loss=0.0042
Epoch 17: Loss=0.0041
Epoch 18: Loss=0.0040
Epoch 19: Loss=0.0040
Epoch 20: Loss=0.0039

LSTM Evaluation:
RMSE: 2.044, MAE: 1.605, R²: 0.918
Epoch 1: Loss=0.3414
Epoch 2: Loss=0.0603
Epoch 3: Loss=0.0098
Epoch 4: Loss=0.0063
Epoch 5: Loss=0.0060
Epoch 6: Loss=0.0060
Epoch 7: Loss=0.0067
Epoch 8: Loss=0.0060
Epoch 9: Loss=0.0059
Epoch 10: Loss=0.0060
Epoch 11: Loss=0.0056
Epoch 12: Loss=0.0058
Epoch 13: Loss=0.0055
Epoch 14: Loss=0.0054
Epoch 15: Loss=0.0053
Epoch 16: Loss=0.0053
Epoch 17: Loss=0.0052
Epoch 18: Loss=0.0050
Epoch 19: Loss=0.0050
Epoch 20: Loss=0.0050

SSM Evaluation:
RMSE: 2.263, MAE: 1.806, R²: 0.899


array([13.136942, 13.472925, 13.77204 , ..., 17.380085, 17.150118,
       16.826471], dtype=float32)