# Baseline: Classical Autoencoder
A classical benchmark for the quantum enhanced autoencoder, following the structure given in "Applying Quantum Autoencoders for Time Series Anomaly Detection", published 10/10/2024, by Robin Frehner, Kurt Stockinger.

In [1]:
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
from torch.utils.data import DataLoader, Dataset
from sklearn.preprocessing import MinMaxScaler
from google.colab import drive

In [2]:
# Mount Google Drive
drive.mount('/content/drive')

Mounted at /content/drive


In [3]:
# Paths to your dataset
train_file_path = '/content/drive/My Drive/UCRArchive_2018/Fish/Fish_TRAIN.tsv'
test_file_path = '/content/drive/My Drive/UCRArchive_2018/Fish/Fish_TEST.tsv'

In [4]:
# Load the data
def load_data(file_path):
    data = pd.read_csv(file_path, sep='\t', header=None)
    data = data.iloc[:, 1:]  # Remove the first useless column
    return data.values

train_data = load_data(train_file_path)
test_data = load_data(test_file_path)

# Normalize the data
scaler = MinMaxScaler()
train_data = scaler.fit_transform(train_data)
test_data = scaler.transform(test_data)


In [5]:
# Preprocess the data into sliding windows
window_size = 128
stride = 1

def create_sliding_windows(data, window_size, stride):
    windows = []
    for i in range(0, data.shape[0] - window_size + 1, stride):
        window = data[i:i + window_size]
        windows.append(window)
    return np.array(windows)

train_windows = create_sliding_windows(train_data, window_size, stride)
test_windows = create_sliding_windows(test_data, window_size, stride)

# Verify dimensions
print("Train windows shape:", train_windows.shape)
print("Test windows shape:", test_windows.shape)

Train windows shape: (48, 128, 463)
Test windows shape: (48, 128, 463)


In [6]:
# PyTorch Dataset and DataLoader
class TimeSeriesDataset(Dataset):
    def __init__(self, data):
        self.data = torch.tensor(data, dtype=torch.float32)

    def __len__(self):
        return self.data.shape[0]

    def __getitem__(self, idx):
        return self.data[idx]

train_dataset = TimeSeriesDataset(train_windows)
test_dataset = TimeSeriesDataset(test_windows)

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)


In [9]:
class Autoencoder(nn.Module):
    def __init__(self, window_size, input_size):
        super(Autoencoder, self).__init__()
        self.encoder_lstm = nn.LSTM(input_size=input_size, hidden_size=128, batch_first=True)
        self.encoder_fc = nn.Sequential(
            nn.ReLU(),
            nn.Linear(128, 10),
            nn.ReLU(),
            nn.Linear(10, 4)
        )
        self.decoder_fc = nn.Sequential(
            nn.Linear(4, 10),
            nn.ReLU(),
            nn.Linear(10, 128),
            nn.ReLU()
        )
        self.decoder_lstm = nn.LSTM(input_size=128, hidden_size=input_size, batch_first=True)

    def forward(self, x):
        batch_size, sequence_length, feature_size = x.size()

        # Encoding
        x, _ = self.encoder_lstm(x)  # (batch_size, sequence_length, hidden_size)
        x = self.encoder_fc(x[:, -1, :])  # (batch_size, latent_dim)

        # Decoding
        x = self.decoder_fc(x)  # (batch_size, hidden_size)
        x = x.unsqueeze(1).repeat(1, sequence_length, 1)  # Repeat for sequence length
        x, _ = self.decoder_lstm(x)  # (batch_size, sequence_length, input_size)

        return x


In [10]:
# Initialize model, loss, and optimizer
input_size = train_windows.shape[2]  # 80 features per time step
model = Autoencoder(window_size=window_size, input_size=input_size)
criterion = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

# Training loop
def train_model(model, train_loader, criterion, optimizer, epochs=50):
    model.train()
    for epoch in range(epochs):
        epoch_loss = 0
        for batch in train_loader:
            optimizer.zero_grad()
            outputs = model(batch)
            loss = criterion(outputs, batch)
            loss.backward()
            optimizer.step()
            epoch_loss += loss.item()
        print(f"Epoch {epoch + 1}/{epochs}, Loss: {epoch_loss / len(train_loader):.4f}")

# Train the autoencoder
train_model(model, train_loader, criterion, optimizer)

Epoch 1/50, Loss: 0.2732
Epoch 2/50, Loss: 0.2434
Epoch 3/50, Loss: 0.1621
Epoch 4/50, Loss: 0.1175
Epoch 5/50, Loss: 0.0864
Epoch 6/50, Loss: 0.0579
Epoch 7/50, Loss: 0.0530
Epoch 8/50, Loss: 0.0494
Epoch 9/50, Loss: 0.0450
Epoch 10/50, Loss: 0.0425
Epoch 11/50, Loss: 0.0406
Epoch 12/50, Loss: 0.0391
Epoch 13/50, Loss: 0.0381
Epoch 14/50, Loss: 0.0374
Epoch 15/50, Loss: 0.0366
Epoch 16/50, Loss: 0.0361
Epoch 17/50, Loss: 0.0358
Epoch 18/50, Loss: 0.0355
Epoch 19/50, Loss: 0.0352
Epoch 20/50, Loss: 0.0350
Epoch 21/50, Loss: 0.0348
Epoch 22/50, Loss: 0.0347
Epoch 23/50, Loss: 0.0345
Epoch 24/50, Loss: 0.0343
Epoch 25/50, Loss: 0.0341
Epoch 26/50, Loss: 0.0340
Epoch 27/50, Loss: 0.0339
Epoch 28/50, Loss: 0.0338
Epoch 29/50, Loss: 0.0337
Epoch 30/50, Loss: 0.0336
Epoch 31/50, Loss: 0.0335
Epoch 32/50, Loss: 0.0334
Epoch 33/50, Loss: 0.0332
Epoch 34/50, Loss: 0.0332
Epoch 35/50, Loss: 0.0330
Epoch 36/50, Loss: 0.0330
Epoch 37/50, Loss: 0.0328
Epoch 38/50, Loss: 0.0327
Epoch 39/50, Loss: 0.

In [11]:
# Evaluate the model on test data
def evaluate_model(model, test_loader):
    model.eval()
    reconstruction_losses = []
    with torch.no_grad():
        for batch in test_loader:
            outputs = model(batch)
            loss = criterion(outputs, batch)
            reconstruction_losses.append(loss.item())
    return np.array(reconstruction_losses)

reconstruction_losses = evaluate_model(model, test_loader)

In [12]:
# Define a threshold for anomalies
threshold = np.percentile(reconstruction_losses, 90)

# Identify anomalies
anomalies = reconstruction_losses > threshold

# Save results
results = pd.DataFrame({
    'Reconstruction Loss': reconstruction_losses,
    'Anomaly': anomalies
})
results.to_csv('/content/drive/My Drive/anomaly_results.csv', index=False)

print("Threshold for anomalies:", threshold)
print("Anomalies detected:", np.sum(anomalies))

Threshold for anomalies: 0.035559603944420815
Anomalies detected: 1
