In [1]:
import sys
import numpy as np
from pathlib import Path
import pandas as pd
import torch
from torch import nn, optim
sys.path.append(str(Path.cwd().parent))
from helpers import get_device

In [2]:
data_dir = Path(".").resolve(strict=True).parent.parent / "data"
assert data_dir.is_dir()

features_path = data_dir / "london_features.csv"
assert features_path.is_file()

models_dir = Path(".").resolve(strict=True).parent.parent / "models"
model_path = models_dir / "lstm_model.pth"

In [3]:
dataset = pd.read_csv(features_path)
assert not dataset.empty

In [4]:
# extract features and target, and convert them to numpy arrays
target = np.expand_dims(dataset["cnt"].values.astype(np.float32), axis=1)
features = dataset.drop(columns=["cnt"]).values.astype(np.float32)

In [5]:
# create sequences (past 24 hours to predict next)
def create_sequences(X, y, seq_len=24):
    xs, ys = [], []
    for i in range(seq_len, len(X)):
        xs.append(X[i-seq_len:i])  # (seq_len, num_features)
        ys.append(y[i])
    return np.array(xs), np.array(ys)

In [6]:
seq_len = 24
features_seq, target_seq = create_sequences(features, target, seq_len)
num_features = features_seq.shape[2]

num_epochs = 100
eval_epochs = 10
hidden_size = 128
learning_rate = 0.005

torch.manual_seed(0)
device = get_device()

In [7]:
class Model(nn.Module):
    def __init__(self):
        super().__init__()
        self.lstm = nn.LSTM(input_size=num_features, hidden_size=hidden_size, batch_first=True)
        self.dropout = nn.Dropout(0.2)
        self.linear = nn.Linear(hidden_size, 1)

    def __call__(self, x):
        x, _ = self.lstm(x)
        x = self.dropout(x)
        x = self.linear(x[:, -1, :])
        return x.squeeze(-1)

In [8]:
def time_series_split(X, n_splits=5):
    num_samples = len(X)
    indices = np.arange(num_samples)
    sample_size = num_samples // (n_splits + 1)
    for i in range(1, n_splits + 1):
        train_stop = i * sample_size
        test_stop = min((i + 1) * sample_size, num_samples)
        yield indices[:train_stop], indices[train_stop:test_stop]

In [9]:
model = Model()
model.to(device)
optimizer = optim.Adam(model.parameters(), lr=learning_rate)
loss_fn = nn.MSELoss()
full_train_idx, full_test_idx = list(time_series_split(features_seq, n_splits=1))[-1]

In [10]:
def train_model():
    model.train()
    X_train = torch.tensor(features_seq[full_train_idx], device=device)  # 3D: (batch, seq_len, features)
    y_train = torch.tensor(target_seq[full_train_idx], device=device).squeeze(-1)
    optimizer.zero_grad()
    y_pred = model(X_train)
    loss = loss_fn(y_pred, y_train)
    loss.backward()
    optimizer.step()


def evaluate_model():
    model.eval()
    train_losses, test_losses = [], []
    with torch.no_grad():
        for _, (train_idx, test_idx) in enumerate(time_series_split(features_seq, n_splits=5)):
            X_t = torch.tensor(features_seq[train_idx], device=device)
            y_t = torch.tensor(target_seq[train_idx], device=device).squeeze(-1)
            pred_t = model(X_t)
            train_losses.append(float(loss_fn(pred_t, y_t)))

            X_te = torch.tensor(features_seq[test_idx], device=device)
            y_te = torch.tensor(target_seq[test_idx], device=device).squeeze(-1)
            pred_te = model(X_te)
            test_losses.append(float(loss_fn(pred_te, y_te)))

    avg_train_rmse = np.sqrt(np.mean(train_losses))
    avg_test_rmse = np.sqrt(np.mean(test_losses))
    return avg_train_rmse, avg_test_rmse

In [11]:
for epoch in range(num_epochs):
    # train loop
    train_model()
    # eval loop
    if (epoch + 1) % eval_epochs == 0:
        train_loss, test_loss = evaluate_model()
        print(f"epoch {epoch + 1}: train loss {train_loss:.4f}, test loss {test_loss:.4f}")

epoch 10: train loss 0.5281, test loss 0.5285
epoch 20: train loss 0.3758, test loss 0.3695
epoch 30: train loss 0.3081, test loss 0.3178
epoch 40: train loss 0.2797, test loss 0.2875
epoch 50: train loss 0.2655, test loss 0.2719
epoch 60: train loss 0.2554, test loss 0.2636
epoch 70: train loss 0.2462, test loss 0.2569
epoch 80: train loss 0.2374, test loss 0.2492
epoch 90: train loss 0.2415, test loss 0.2562
epoch 100: train loss 0.2323, test loss 0.2462


In [12]:
models_dir.mkdir(exist_ok=True)
torch.save(model.state_dict(), model_path)