# Setup Environment and Read Data

In [None]:
import torch
import numpy as np
import pandas as pd
import pickle
import copy
from tqdm import trange,tqdm
import torch.nn as nn
from torch.optim import Adam
from torch.optim.lr_scheduler import ReduceLROnPlateau
import matplotlib.pyplot as plt
from torch.utils.data import DataLoader
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import f1_score, classification_report, confusion_matrix

## Setup the Dataset

In [None]:
DRIVE = "drive/MyDrive/Colab Notebooks/ELTE/DSLAB/ServerMachineDataset/"
MACHINE = "machine-1-1.txt"
TRAIN_DATASET = DRIVE + "train/" + MACHINE
TEST_DATASET = DRIVE + "test/" + MACHINE
TEST_LABEL_DATASET = DRIVE + "test_label/" + MACHINE

metric = pd.read_csv(TRAIN_DATASET, header=None)
metric_test = pd.read_csv(TEST_DATASET, header=None)
true_anomalies = pd.read_csv(TEST_LABEL_DATASET, header=None)[0].to_numpy()

In [None]:
metric

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,28,29,30,31,32,33,34,35,36,37
0,0.032258,0.039195,0.027871,0.024390,0.0,0.915385,0.343691,0.0,0.020011,0.000122,...,0.0,0.004298,0.029993,0.022131,0.000000,0.000045,0.034677,0.034747,0.0,0.0
1,0.043011,0.048729,0.033445,0.025552,0.0,0.915385,0.344633,0.0,0.019160,0.001722,...,0.0,0.004298,0.030041,0.028821,0.000000,0.000045,0.035763,0.035833,0.0,0.0
2,0.043011,0.034958,0.032330,0.025552,0.0,0.915385,0.344633,0.0,0.020011,0.000122,...,0.0,0.004298,0.026248,0.021101,0.000000,0.000045,0.033012,0.033082,0.0,0.0
3,0.032258,0.028602,0.030100,0.024390,0.0,0.912821,0.342750,0.0,0.021289,0.000000,...,0.0,0.004298,0.030169,0.025733,0.000000,0.000022,0.035112,0.035182,0.0,0.0
4,0.032258,0.019068,0.026756,0.023229,0.0,0.912821,0.342750,0.0,0.018734,0.000000,...,0.0,0.004298,0.027240,0.022645,0.000000,0.000034,0.033447,0.033517,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
28474,0.075269,0.046610,0.071349,0.076655,0.0,0.928205,0.269303,0.0,0.031649,0.000244,...,0.0,0.008596,0.068980,0.049408,0.000386,0.000034,0.064504,0.064572,0.0,0.0
28475,0.086022,0.070975,0.075808,0.077816,0.0,0.930769,0.269303,0.0,0.029946,0.000244,...,0.0,0.008596,0.073029,0.055584,0.000386,0.000034,0.067690,0.067757,0.0,0.0
28476,0.086022,0.065678,0.073579,0.076655,0.0,0.935897,0.270245,0.0,0.030372,0.000244,...,0.0,0.008596,0.070516,0.048893,0.000386,0.000034,0.064866,0.064934,0.0,0.0
28477,0.086022,0.056144,0.068004,0.074332,0.0,0.933333,0.271186,0.0,0.032643,0.000244,...,0.0,0.008596,0.070308,0.055069,0.000386,0.000045,0.067111,0.067178,0.0,0.0


## Preprocess the Dataset

### Non-scaled

In [None]:
# create train and test dataloaders
metric.interpolate(inplace=True)
metric.bfill(inplace=True)
metric_tensor = metric.values

metric_test.interpolate(inplace=True)
metric_test.bfill(inplace=True)
metric_test_tensor = metric_test.values

sequence_length = 30
sequences = []
for i in range(metric_tensor.shape[0] - sequence_length + 1):
  sequences.append(metric_tensor[i:i + sequence_length])

train_data, val_data = train_test_split(sequences, test_size=0.3, random_state=42) # 70% train, 30% temp

test_sequences = []
for i in range(metric_test_tensor.shape[0] - sequence_length + 1):
  test_sequences.append(metric_test_tensor[i:i + sequence_length])

batch_size = 32
train_loader = DataLoader(dataset=train_data, batch_size=batch_size, shuffle=True)
val_loader = DataLoader(dataset=val_data, batch_size=batch_size, shuffle=False)
test_loader = DataLoader(dataset=test_sequences, batch_size=batch_size, shuffle=False)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [None]:
sequences[0].shape

(30, 38)

# Define the Networks

In [None]:
input_dim = 38
hidden_dim = 128
latent_dim = 32

In [None]:
def save_model(model_name, model, input_dim, latent_dim, hidden_dim):
    model_state = {
        'input_dim':input_dim,
        'latent_dim':latent_dim,
        'hidden_dim':hidden_dim,
        'state_dict':model.state_dict()
    }
    torch.save(model_state, f'drive/MyDrive/Colab Notebooks/ELTE/DSLAB/{model_name}.pth')

## AutoEncoder

In [None]:
class FeedforwardEncoder(nn.Module):
    def __init__(self, input_dim, sequence_length, hidden_dim, latent_dim):
        super(FeedforwardEncoder, self).__init__()
        self.flatten_dim = input_dim * sequence_length
        self.encoder = nn.Sequential(
            nn.Linear(self.flatten_dim, hidden_dim),
            nn.ReLU(),
            nn.Linear(hidden_dim, latent_dim)
        )

    def forward(self, x):
        x = x.view(x.size(0), -1)  # Flatten: (batch_size, sequence_length * input_dim)
        z = self.encoder(x)
        return z

class FeedforwardDecoder(nn.Module):
    def __init__(self, latent_dim, hidden_dim, output_dim, sequence_length):
        super(FeedforwardDecoder, self).__init__()
        self.output_dim = output_dim
        self.sequence_length = sequence_length
        self.decoder = nn.Sequential(
            nn.Linear(latent_dim, hidden_dim),
            nn.ReLU(),
            nn.Linear(hidden_dim, output_dim * sequence_length)
        )

    def forward(self, z):
        x_recon = self.decoder(z)
        return x_recon.view(z.size(0), self.sequence_length, self.output_dim)

class AE(nn.Module):
    def __init__(self, input_dim, sequence_length, hidden_dim, latent_dim, device='cpu'):
        super(AE, self).__init__()
        self.encoder = FeedforwardEncoder(input_dim, sequence_length, hidden_dim, latent_dim).to(device)
        self.decoder = FeedforwardDecoder(latent_dim, hidden_dim, input_dim, sequence_length).to(device)

    def forward(self, x):
        z = self.encoder(x)
        x_recon = self.decoder(z)
        return x_recon

In [None]:
def loss_function_ae(x, x_hat):
    return nn.functional.mse_loss(x_hat, x, reduction='sum')

In [None]:
model_ae = AE(input_dim=input_dim,
                hidden_dim=hidden_dim,
                latent_dim=latent_dim,
                sequence_length=sequence_length,
                device=device).to(device)
optimizer_ae = Adam(model_ae.parameters(), lr=1e-3)
scheduler_ae = ReduceLROnPlateau(optimizer_ae, 'min', patience=5, factor=0.1, verbose=True)



## LSTM AutoEncoder

In [None]:
class LSTMEncoder(nn.Module):
    def __init__(self, input_dim, hidden_dim, latent_dim, num_layers=1):
        super(LSTMEncoder, self).__init__()
        self.lstm = nn.LSTM(input_dim, hidden_dim, num_layers=num_layers, batch_first=True)
        self.fc = nn.Linear(hidden_dim, latent_dim)

    def forward(self, x):
        _, (h_n, _) = self.lstm(x)
        h = h_n[-1]  # Take last layer's hidden state
        z = self.fc(h)
        return z

class LSTMDecoder(nn.Module):
    def __init__(self, latent_dim, hidden_dim, output_dim, sequence_length, num_layers=1):
        super(LSTMDecoder, self).__init__()
        self.sequence_length = sequence_length
        self.latent_to_hidden = nn.Linear(latent_dim, hidden_dim)
        self.lstm = nn.LSTM(hidden_dim, hidden_dim, num_layers=num_layers, batch_first=True)
        self.output_layer = nn.Linear(hidden_dim, output_dim)

    def forward(self, z):
        hidden = self.latent_to_hidden(z).unsqueeze(1).repeat(1, self.sequence_length, 1)
        out, _ = self.lstm(hidden)
        return self.output_layer(out)


class LSTMAE(nn.Module):
    def __init__(self, input_dim, hidden_dim, latent_dim, sequence_length, num_layers=1, device='cpu'):
        super(LSTMAE, self).__init__()
        self.encoder = LSTMEncoder(input_dim, hidden_dim, latent_dim, num_layers).to(device)
        self.decoder = LSTMDecoder(latent_dim, hidden_dim, input_dim, sequence_length, num_layers).to(device)

    def forward(self, x):
        z = self.encoder(x)
        x_recon = self.decoder(z)
        return x_recon

In [None]:
def loss_function_lstm_ae(x, x_hat):
    return nn.functional.mse_loss(x_hat, x, reduction='sum')

In [None]:
model_lstm_ae = LSTMAE(input_dim=input_dim,
                hidden_dim=hidden_dim,
                latent_dim=latent_dim,
                sequence_length=sequence_length,
                num_layers=1,
                device=device).to(device)
optimizer_lstm_ae = Adam(model_lstm_ae.parameters(), lr=1e-3)
scheduler_lstm_ae = ReduceLROnPlateau(optimizer_lstm_ae, 'min', patience=5, factor=0.1, verbose=True)

## LSTM Variational AutoEncoder

In [None]:
class LSTMEncoder(nn.Module):
    def __init__(self, input_dim, hidden_dim, latent_dim, num_layers=1):
        super(LSTMEncoder, self).__init__()
        self.lstm = nn.LSTM(input_dim, hidden_dim, num_layers=num_layers, batch_first=True)
        self.fc_mean = nn.Linear(hidden_dim, latent_dim)
        self.fc_logvar = nn.Linear(hidden_dim, latent_dim)

    def forward(self, x):
        _, (h_n, _) = self.lstm(x)  # h_n: (num_layers, batch, hidden_dim)
        h = h_n[-1]  # take the output of the last layer
        return self.fc_mean(h), self.fc_logvar(h)


class LSTMDecoder(nn.Module):
    def __init__(self, latent_dim, hidden_dim, output_dim, sequence_length, num_layers=1):
        super(LSTMDecoder, self).__init__()
        self.sequence_length = sequence_length
        self.latent_to_hidden = nn.Linear(latent_dim, hidden_dim)
        self.lstm = nn.LSTM(hidden_dim, hidden_dim, num_layers=num_layers, batch_first=True)
        self.output_layer = nn.Linear(hidden_dim, output_dim)

    def forward(self, z):
        # Repeat z for each timestep
        hidden = self.latent_to_hidden(z).unsqueeze(1).repeat(1, self.sequence_length, 1)
        out, _ = self.lstm(hidden)
        return self.output_layer(out)


class LSTMVAE(nn.Module):
    def __init__(self, input_dim, hidden_dim, latent_dim, sequence_length, num_layers=1, device='cpu'):
        super(LSTMVAE, self).__init__()
        self.encoder = LSTMEncoder(input_dim, hidden_dim, latent_dim, num_layers).to(device)
        self.decoder = LSTMDecoder(latent_dim, hidden_dim, input_dim, sequence_length, num_layers).to(device)

    def reparameterize(self, mean, logvar):
        std = torch.exp(0.5 * logvar)
        eps = torch.randn_like(std)
        return mean + eps * std

    def forward(self, x):
        mean, logvar = self.encoder(x)
        z = self.reparameterize(mean, logvar)
        x_recon = self.decoder(z)
        return x_recon, mean, logvar

In [None]:
def loss_function_lstm_vae(x, x_hat, mean, log_var):
    reproduction_loss = nn.functional.mse_loss(x_hat, x, reduction='sum')
    KLD = - 0.5 * torch.sum(1+ log_var - mean.pow(2) - log_var.exp())

    return reproduction_loss + KLD

In [None]:
model_lstm_vae = LSTMVAE(input_dim=input_dim,
                hidden_dim=hidden_dim,
                latent_dim=latent_dim,
                sequence_length=sequence_length,
                num_layers=1,
                device=device).to(device)
optimizer_lstm_vae = Adam(model_lstm_vae.parameters(), lr=1e-3)
scheduler_lstm_vae = ReduceLROnPlateau(optimizer_lstm_vae, 'min', patience=5, factor=0.1, verbose=True)

# Train

In [None]:
def train_model(model, train_loader, val_loader, optimizer, loss_fn, scheduler, variational=False, num_epochs=10, device='cpu'):
    torch.cuda.empty_cache()
    train_losses = []
    val_losses = []

    early_stop_tolerant_count = 0
    early_stop_tolerant = 10
    best_loss = float('inf')
    best_model_wts = copy.deepcopy(model.state_dict())

    for epoch in range(num_epochs):
        train_loss = 0.0
        model.train()
        for batch in train_loader:
            batch = torch.tensor(batch, dtype=torch.float32).to(device)

            optimizer.zero_grad()

            if variational:
                recon_batch, mean, logvar = model(batch)
                loss = loss_fn(recon_batch, batch, mean, logvar)
            else:
                recon_batch = model(batch)
                loss = loss_fn(batch, recon_batch)

            loss.backward()
            optimizer.step()
            train_loss += loss.item()

        train_loss /= len(train_loader)
        train_losses.append(train_loss)

        # Validation
        model.eval()
        valid_loss = 0.0
        with torch.no_grad():
            for batch in val_loader:
                batch = torch.tensor(batch, dtype=torch.float32).to(device)
                if variational:
                    recon_batch, mean, logvar = model(batch)
                    loss = loss_fn(recon_batch, batch, mean, logvar)
                else:
                    recon_batch = model(batch)
                    loss = loss_fn(batch, recon_batch)
                valid_loss += loss.item()

        valid_loss /= len(val_loader)
        val_losses.append(valid_loss)

        scheduler.step(valid_loss)

        if valid_loss < best_loss:
            best_loss = valid_loss
            best_model_wts = copy.deepcopy(model.state_dict())
            early_stop_tolerant_count = 0
        else:
            early_stop_tolerant_count += 1

        print(f"Epoch {epoch+1:04d}: train loss {train_loss:.4f}, valid loss {valid_loss:.4f}")

        if early_stop_tolerant_count >= early_stop_tolerant:
            print("Early stopping triggered.")
            break

    model.load_state_dict(best_model_wts)
    print("Finished Training.")
    return train_losses, val_losses

## AutoEncoder

In [None]:
train_losses_ae, val_losses_ae = train_model(model_ae, train_loader, val_loader, optimizer_ae, loss_function_ae, scheduler_ae, False, num_epochs=100, device=device)

  batch = torch.tensor(batch, dtype=torch.float32).to(device)
  batch = torch.tensor(batch, dtype=torch.float32).to(device)


Epoch 0001: train loss 53.8265, valid loss 9.6677
Epoch 0002: train loss 8.5551, valid loss 8.4994
Epoch 0003: train loss 7.6650, valid loss 7.5286
Epoch 0004: train loss 6.6135, valid loss 6.7522
Epoch 0005: train loss 6.3623, valid loss 6.6303
Epoch 0006: train loss 6.2333, valid loss 6.5250
Epoch 0007: train loss 6.0628, valid loss 6.3578
Epoch 0008: train loss 5.8341, valid loss 6.2207
Epoch 0009: train loss 5.6077, valid loss 5.8243
Epoch 0010: train loss 5.3126, valid loss 5.5867
Epoch 0011: train loss 5.0831, valid loss 5.3053
Epoch 0012: train loss 4.8848, valid loss 5.1182
Epoch 0013: train loss 4.7216, valid loss 4.8789
Epoch 0014: train loss 4.5616, valid loss 4.7971
Epoch 0015: train loss 4.4782, valid loss 4.6007
Epoch 0016: train loss 4.3787, valid loss 4.4958
Epoch 0017: train loss 4.2963, valid loss 4.4564
Epoch 0018: train loss 4.2312, valid loss 4.3950
Epoch 0019: train loss 4.1390, valid loss 4.3058
Epoch 0020: train loss 4.0658, valid loss 4.2620
Epoch 0021: train l

## LSTM AutoEncoder

In [None]:
train_losses_lstm_ae, val_losses_lstm_ae = train_model(model_lstm_ae, train_loader, val_loader, optimizer_lstm_ae, loss_function_lstm_ae, scheduler_lstm_ae, False, num_epochs=100, device=device)

  batch = torch.tensor(batch, dtype=torch.float32).to(device)
  batch = torch.tensor(batch, dtype=torch.float32).to(device)


Epoch 0001: train loss 39.3473, valid loss 14.0262
Epoch 0002: train loss 10.7073, valid loss 9.7964
Epoch 0003: train loss 8.8445, valid loss 9.2932
Epoch 0004: train loss 8.4269, valid loss 8.6735
Epoch 0005: train loss 7.8551, valid loss 8.0608
Epoch 0006: train loss 7.5710, valid loss 7.9039
Epoch 0007: train loss 7.4262, valid loss 7.8936
Epoch 0008: train loss 7.3886, valid loss 7.8245
Epoch 0009: train loss 7.4033, valid loss 7.7002
Epoch 0010: train loss 7.3043, valid loss 7.5911
Epoch 0011: train loss 7.1531, valid loss 7.6514
Epoch 0012: train loss 6.4770, valid loss 6.8016
Epoch 0013: train loss 6.3444, valid loss 6.5142
Epoch 0014: train loss 6.2995, valid loss 6.3004
Epoch 0015: train loss 6.2720, valid loss 6.2892
Epoch 0016: train loss 6.1940, valid loss 6.4522
Epoch 0017: train loss 6.1870, valid loss 6.2083
Epoch 0018: train loss 6.1001, valid loss 6.3646
Epoch 0019: train loss 6.0902, valid loss 6.0575
Epoch 0020: train loss 6.0218, valid loss 6.3165
Epoch 0021: train

## LSTM Variational AutoEncoder

In [None]:
train_losses_lstm_vae, val_losses_lstm_vae = train_model(model_lstm_vae, train_loader, val_loader, optimizer_lstm_vae, loss_function_lstm_vae, scheduler_lstm_vae, True, num_epochs=100, device=device)

  batch = torch.tensor(batch, dtype=torch.float32).to(device)
  batch = torch.tensor(batch, dtype=torch.float32).to(device)


Epoch 0001: train loss 111.0180, valid loss 69.1832
Epoch 0002: train loss 65.4164, valid loss 64.8201
Epoch 0003: train loss 63.8878, valid loss 64.0853
Epoch 0004: train loss 63.6279, valid loss 62.9051
Epoch 0005: train loss 63.3731, valid loss 63.9493
Epoch 0006: train loss 63.6658, valid loss 62.9409
Epoch 0007: train loss 62.6779, valid loss 63.1970
Epoch 0008: train loss 62.4767, valid loss 64.7848
Epoch 0009: train loss 62.5802, valid loss 62.3761
Epoch 0010: train loss 62.2796, valid loss 63.7612
Epoch 0011: train loss 62.2403, valid loss 63.3894
Epoch 0012: train loss 62.0063, valid loss 62.4541
Epoch 0013: train loss 62.1138, valid loss 62.4695
Epoch 0014: train loss 62.0534, valid loss 62.6712
Epoch 0015: train loss 61.9204, valid loss 63.2243
Epoch 0016: train loss 60.9410, valid loss 61.2454
Epoch 0017: train loss 60.5379, valid loss 61.3935
Epoch 0018: train loss 60.5722, valid loss 61.5015
Epoch 0019: train loss 60.9329, valid loss 61.4786
Epoch 0020: train loss 60.8432

# Evaluation

In [None]:
def evaluate_model(model, variational, test_loader, device, loss_fn, percentile_threshold=90):
    model.eval()
    anomaly_scores = []

    with torch.no_grad():
        for batch in test_loader:
            batch = torch.tensor(batch, dtype=torch.float32).to(device)

            batch_scores = []
            for i in range(batch.shape[0]):  # Iterate through each sequence in the batch
                sequence = batch[i].unsqueeze(0)  # Shape: (1, seq_len, features)
                if variational:
                    recon_sequence, mean, logvar = model(sequence)
                    loss = loss_fn(recon_sequence, sequence, mean, logvar)
                else:
                    recon_sequence = model(sequence)
                    loss = loss_fn(sequence, recon_sequence)
                batch_scores.append(loss.item())

            anomaly_scores.extend(batch_scores)

    # Calculate threshold and identify anomalies
    threshold = np.percentile(anomaly_scores, percentile_threshold)
    anomaly_indices = [i for i, score in enumerate(anomaly_scores) if score > threshold]
    return anomaly_indices

In [None]:
def calculate_f1_score(anomaly_indices, true_anomalies):
    # Create a binary array representing predicted anomalies
    predicted_anomalies = np.zeros_like(true_anomalies)
    for index in anomaly_indices:
        if index < len(predicted_anomalies):  # Check index bounds
          predicted_anomalies[index] = 1

    # Calculate the F1 score
    f1 = f1_score(true_anomalies, predicted_anomalies)
    return f1, predicted_anomalies

## AutoEncoder

In [None]:
anomalies_ae = evaluate_model(model_ae, False, test_loader, device, loss_function_ae, 90)

  batch = torch.tensor(batch, dtype=torch.float32).to(device)


In [None]:
f1_ae, predicted_anomalies_ae = calculate_f1_score(anomalies_ae, true_anomalies)
print(f"F1 Score: {f1_ae}")

F1 Score: 0.5784437624119877


In [None]:
print(classification_report(true_anomalies, predicted_anomalies_ae))

              precision    recall  f1-score   support

           0       0.96      0.95      0.95     25785
           1       0.56      0.59      0.58      2694

    accuracy                           0.92     28479
   macro avg       0.76      0.77      0.77     28479
weighted avg       0.92      0.92      0.92     28479



In [None]:
print(confusion_matrix(true_anomalies, predicted_anomalies_ae))

[[24542  1243]
 [ 1092  1602]]


## LSTM AutoEncoder

In [None]:
anomalies_lstm_ae = evaluate_model(model_lstm_ae, False, test_loader, device, loss_function_lstm_ae, 90)

  batch = torch.tensor(batch, dtype=torch.float32).to(device)


In [None]:
f1_lstm_ae, predicted_anomalies_lstm_ae = calculate_f1_score(anomalies_lstm_ae, true_anomalies)
print(f"F1 Score: {f1_lstm_ae}")

F1 Score: 0.5542516699765301


In [None]:
print(classification_report(true_anomalies, predicted_anomalies_lstm_ae))

              precision    recall  f1-score   support

           0       0.95      0.95      0.95     25785
           1       0.54      0.57      0.55      2694

    accuracy                           0.91     28479
   macro avg       0.75      0.76      0.75     28479
weighted avg       0.92      0.91      0.91     28479



In [None]:
print(confusion_matrix(true_anomalies, predicted_anomalies_lstm_ae))

[[24475  1310]
 [ 1159  1535]]


## LSTM Variational AutoEncoder

In [None]:
anomalies_lstm_vae = evaluate_model(model_lstm_vae, True, test_loader, device, loss_function_lstm_vae, 90)

  batch = torch.tensor(batch, dtype=torch.float32).to(device)


In [None]:
f1_lstm_vae, predicted_anomalies_lstm_vae = calculate_f1_score(anomalies_lstm_vae, true_anomalies)
print(f"F1 Score: {f1_lstm_vae}")

F1 Score: 0.6141902870554252


In [None]:
print(classification_report(true_anomalies, predicted_anomalies_lstm_vae))

              precision    recall  f1-score   support

           0       0.96      0.96      0.96     25785
           1       0.60      0.63      0.61      2694

    accuracy                           0.92     28479
   macro avg       0.78      0.79      0.79     28479
weighted avg       0.93      0.92      0.93     28479



In [None]:
print(confusion_matrix(true_anomalies, predicted_anomalies_lstm_vae))

[[24641  1144]
 [  993  1701]]
