In [1]:
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
from torch.utils.data import DataLoader, TensorDataset


In [2]:
gps = pd.read_csv("../data/generated/gps_with_anomalies.csv")
gps = gps.sort_values(["tourist_id", "timestamp"])
gps.head()


Unnamed: 0,tourist_id,timestamp,latitude,longitude,speed_kmph,gps_accuracy_m
0,T000,2026-01-29 03:36:44.384525,26.912427,75.78748,19.36,8.06
1,T000,2026-01-29 03:41:44.384525,26.912017,75.787955,10.06,9.33
2,T000,2026-01-29 03:46:44.384525,26.911663,75.787663,17.32,6.91
3,T000,2026-01-29 03:51:44.384525,26.911785,75.787402,17.46,7.65
4,T000,2026-01-29 03:56:44.384525,26.911409,75.787124,6.56,7.68


In [3]:
SEQUENCE_LENGTH = 10

X_sequences = []
y_targets = []

for tourist_id, df in gps.groupby("tourist_id"):
    coords = df[["latitude", "longitude"]].values
    
    for i in range(len(coords) - SEQUENCE_LENGTH):
        X_sequences.append(coords[i:i+SEQUENCE_LENGTH])
        y_targets.append(coords[i+SEQUENCE_LENGTH])

X_sequences = np.array(X_sequences)
y_targets = np.array(y_targets)

print(X_sequences.shape, y_targets.shape)


(5152, 10, 2) (5152, 2)


In [4]:
X_tensor = torch.tensor(X_sequences, dtype=torch.float32)
y_tensor = torch.tensor(y_targets, dtype=torch.float32)

dataset = TensorDataset(X_tensor, y_tensor)
loader = DataLoader(dataset, batch_size=32, shuffle=True)


In [5]:
class LSTMModel(nn.Module):
    def __init__(self, input_size=2, hidden_size=32, num_layers=1):
        super(LSTMModel, self).__init__()
        
        self.lstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True)
        self.fc = nn.Linear(hidden_size, 2)
    
    def forward(self, x):
        out, _ = self.lstm(x)
        out = out[:, -1, :]
        out = self.fc(out)
        return out

model = LSTMModel()
criterion = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)


In [6]:
EPOCHS = 10

for epoch in range(EPOCHS):
    total_loss = 0
    
    for X_batch, y_batch in loader:
        optimizer.zero_grad()
        outputs = model(X_batch)
        loss = criterion(outputs, y_batch)
        loss.backward()
        optimizer.step()
        
        total_loss += loss.item()
    
    print(f"Epoch {epoch+1}, Loss: {total_loss:.4f}")


Epoch 1, Loss: 473850.6187
Epoch 2, Loss: 380979.6458
Epoch 3, Loss: 319186.7870
Epoch 4, Loss: 271125.5609
Epoch 5, Loss: 230906.4077
Epoch 6, Loss: 196587.7926
Epoch 7, Loss: 167006.9758
Epoch 8, Loss: 141340.5524
Epoch 9, Loss: 118983.6719
Epoch 10, Loss: 99487.6476


In [7]:
torch.save(model.state_dict(), "../models/lstm_trajectory.pth")
print("✅ LSTM model saved!")


✅ LSTM model saved!
