## Autoencoder for Rare Behaviour Detection

In [1]:
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
from torch.utils.data import DataLoader, TensorDataset


In [2]:
df = pd.read_csv("../data/generated/tourist_features.csv")
df.head()


Unnamed: 0,tourist_id,avg_speed,speed_variance,total_distance_km,stop_duration_minutes,gps_points_count,avg_area_risk,night_travel_ratio
0,T000,9.38,45.17,3.54,145,120,0.56,0.34
1,T001,11.97,16.21,3.61,0,91,0.56,0.45
2,T002,12.86,17.62,4.98,0,120,0.56,0.34
3,T003,12.42,19.37,5.05,0,120,0.56,0.34
4,T004,16.52,26.1,133.33,0,120,0.56,0.34


In [3]:
X = df.drop(columns=["tourist_id"]).values

X_tensor = torch.tensor(X, dtype=torch.float32)
dataset = TensorDataset(X_tensor)
loader = DataLoader(dataset, batch_size=16, shuffle=True)


In [4]:
class Autoencoder(nn.Module):
    def __init__(self, input_dim):
        super(Autoencoder, self).__init__()
        
        self.encoder = nn.Sequential(
            nn.Linear(input_dim, 16),
            nn.ReLU(),
            nn.Linear(16, 8),
            nn.ReLU()
        )
        
        self.decoder = nn.Sequential(
            nn.Linear(8, 16),
            nn.ReLU(),
            nn.Linear(16, input_dim)
        )
        
    def forward(self, x):
        encoded = self.encoder(x)
        decoded = self.decoder(encoded)
        return decoded

input_dim = X.shape[1]
model = Autoencoder(input_dim)

criterion = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)


In [5]:
EPOCHS = 20

for epoch in range(EPOCHS):
    total_loss = 0
    
    for batch in loader:
        inputs = batch[0]
        
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, inputs)
        loss.backward()
        optimizer.step()
        
        total_loss += loss.item()
    
    print(f"Epoch {epoch+1}, Loss: {total_loss:.4f}")


Epoch 1, Loss: 12642.9893
Epoch 2, Loss: 12282.5479
Epoch 3, Loss: 12680.3013
Epoch 4, Loss: 12875.2988
Epoch 5, Loss: 11390.2483
Epoch 6, Loss: 11333.0117
Epoch 7, Loss: 11263.9320
Epoch 8, Loss: 11182.8335
Epoch 9, Loss: 12159.0037
Epoch 10, Loss: 12085.4690
Epoch 11, Loss: 12278.6758
Epoch 12, Loss: 10650.7561
Epoch 13, Loss: 10455.4319
Epoch 14, Loss: 11302.9761
Epoch 15, Loss: 9953.6665
Epoch 16, Loss: 11696.5739
Epoch 17, Loss: 10252.3540
Epoch 18, Loss: 10575.6954
Epoch 19, Loss: 8131.6005
Epoch 20, Loss: 7873.0049


In [6]:
with torch.no_grad():
    reconstructed = model(X_tensor)
    reconstruction_error = torch.mean((X_tensor - reconstructed) ** 2, dim=1)

df["reconstruction_error"] = reconstruction_error.numpy()

df.head()


Unnamed: 0,tourist_id,avg_speed,speed_variance,total_distance_km,stop_duration_minutes,gps_points_count,avg_area_risk,night_travel_ratio,reconstruction_error
0,T000,9.38,45.17,3.54,145,120,0.56,0.34,4320.536621
1,T001,11.97,16.21,3.61,0,91,0.56,0.45,700.071411
2,T002,12.86,17.62,4.98,0,120,0.56,0.34,1236.528076
3,T003,12.42,19.37,5.05,0,120,0.56,0.34,1227.482178
4,T004,16.52,26.1,133.33,0,120,0.56,0.34,2919.267822


In [7]:
threshold = df["reconstruction_error"].mean() + 2 * df["reconstruction_error"].std()

df["autoencoder_anomaly"] = df["reconstruction_error"] > threshold

df[["tourist_id", "reconstruction_error", "autoencoder_anomaly"]].head()


Unnamed: 0,tourist_id,reconstruction_error,autoencoder_anomaly
0,T000,4320.536621,False
1,T001,700.071411,False
2,T002,1236.528076,False
3,T003,1227.482178,False
4,T004,2919.267822,False


In [8]:
torch.save(model.state_dict(), "../models/autoencoder.pth")
print("✅ Autoencoder saved!")


✅ Autoencoder saved!


Useful for:

Missing tourists

Sudden movement collapse

Strange travel patterns