## Results of the models

In this notebook we will compare the results of the notebooks of the different models to wee which is better

In [27]:
import os
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
from torch.nn.functional import cosine_similarity
import pandas as pd
import numpy as np

In [28]:
logs_df = pd.read_csv('../data/sitges_access_prepared_last.csv')

In [29]:
# Dataset to work without sequences
class CustomDataset(Dataset):
    def __init__(self, dataframe):
        self.data = dataframe.values.astype(np.float32) 

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        sample = self.data[idx]
        return torch.tensor(sample)


# Custom Dataset class to create sequences on-the-fly
class SequenceDataset(Dataset):
    def __init__(self, data, seq_length):
        self.data = data
        self.seq_length = seq_length

    def __len__(self):
        return len(self.data) - self.seq_length + 1

    def __getitem__(self, idx):
        sequence = self.data[idx:idx + self.seq_length].to_numpy()
        return torch.tensor(sequence, dtype=torch.float32)

In [30]:

test_dataset_normalAutoencoder = CustomDataset(logs_df)
test_loader_normalAutoencoder = DataLoader(test_dataset_normalAutoencoder, batch_size=1, shuffle=False)

test_dataset_LSTMAutoencoder = SequenceDataset(logs_df, 20)
test_loader_LSTMAutoencoder = DataLoader(test_dataset_LSTMAutoencoder, batch_size=1, shuffle=False)

In [31]:
class LogAnomalyDetector(nn.Module):
    def __init__(self, input_size, hidden_size):
        super(LogAnomalyDetector, self).__init__()
        self.encoder = nn.Sequential(
            nn.Linear(input_size, hidden_size[0]),
            nn.ReLU(),
            nn.Linear(hidden_size[0], hidden_size[1]),
            nn.ReLU(),
            nn.Linear(hidden_size[1], hidden_size[2])
        )
        self.decoder = nn.Sequential(
            nn.Linear(hidden_size[2], hidden_size[1]),
            nn.ReLU(),
            nn.Linear(hidden_size[1], hidden_size[0]),
            nn.ReLU(),
            nn.Linear(hidden_size[0], input_size)
        )

    def forward(self, x):
        emb = self.encoder(x)
        #print("Latent space:", emb)
        x = self.decoder(emb)
        return x

In [32]:
class LSTMAutoencoder(nn.Module):
    def __init__(self, input_dim, hidden_dim, num_layers):
        super(LSTMAutoencoder, self).__init__()
        self.encoder = nn.LSTM(input_dim, hidden_dim, num_layers, batch_first=True)
        self.decoder = nn.LSTM(hidden_dim, input_dim, num_layers, batch_first=True)
    
    def forward(self, x):
        batch_size, seq_len, _ = x.size()
        # Encoder
        _, (hidden, _) = self.encoder(x)
        # Repeat the last hidden state seq_len times
        hidden = hidden[-1].repeat(seq_len, 1, 1).transpose(0, 1)
        # Decoder
        decoded, _ = self.decoder(hidden)
        return decoded

In [33]:
input_size=len(logs_df.columns)
hidden_size=[50, 20, 5] 
num_layers=1
normalAutoencoder = LogAnomalyDetector(input_size=input_size, hidden_size=hidden_size)

input_dim = len(logs_df.columns)  # Number of features in your log data
hidden_dim = input_dim//4
num_layers = 3
LSTMAutoencoder = LSTMAutoencoder(input_dim, hidden_dim, num_layers)

In [34]:
ROOT_DIR = os.path.dirname(os.path.abspath(""))
normalAutoencoder.load_state_dict(torch.load(os.path.join(ROOT_DIR, "models/normalAutoencoder.pt"), map_location=torch.device('cpu')))
LSTMAutoencoder.load_state_dict(torch.load(os.path.join(ROOT_DIR, "models/LSTMAutoencoder.pt"), map_location=torch.device('cpu')))

<All keys matched successfully>

In [35]:
def compute_normality_score(individual_mse, max_possible_mse):
    normalized_mse = individual_mse / max_possible_mse
    normality_score = 1 - normalized_mse  # Inversion (1 normal, 0 anomaly)
    return normality_score

In [36]:
def select_possible_anomalies(model, test_loader, criterion, threshold, max_mse):

    model.eval() 

    test_loss = 0.0
    possible_anomalies=[] 
    with torch.no_grad():
        for i, input in enumerate(test_loader):
            #inputs = inputs.to(device)
            output = model(input)
            loss = criterion(output.squeeze(), input.squeeze())
            score=compute_normality_score(loss, max_mse)

            if score <= threshold: possible_anomalies.append((i, score))


    return possible_anomalies

In [37]:
criterion = nn.MSELoss()
average_mse=0.04 # Obtained from the training fase
max_mse=average_mse*4
threshold=0.3
possible_anomalies_normalAutoencoder=select_possible_anomalies(normalAutoencoder, test_loader_normalAutoencoder, criterion, threshold, max_mse)

In [38]:
print(len(possible_anomalies_normalAutoencoder))

691


In [39]:
criterion = nn.MSELoss()
average_mse=0.11 # Obtained from the training fase
max_mse=average_mse*4
threshold=0.4
possible_anomalies_LSTMAutoencoder=select_possible_anomalies(LSTMAutoencoder, test_loader_LSTMAutoencoder, criterion, threshold, max_mse)

In [40]:
print(len(possible_anomalies_LSTMAutoencoder))

19


In [41]:
real_logs_df = pd.read_csv('../data/sitges_access.20240129.csv')


In [67]:
def vec2log(anomalies, dataframe, seq):
    logs=[]
    if seq:
        for i, anomaly in enumerate(anomalies):
            sequence = dataframe.iloc[anomaly[0]:anomaly[0]+20].copy()
            # Add a 'Sequence' column with the sequence number
            sequence['sequence'] = i
            score = round(anomaly[1].item(), 2)
            sequence['score'] = str(score)
            logs.append(sequence)
    else:
        for anomaly in anomalies:
            log = dataframe.iloc[anomaly[0]:anomaly[0]+20].copy()
            # Add a 'Sequence' column with the sequence number
            score = round(anomaly[1].item(), 2)
            log['score'] = str(score)
            logs.append(log)

    if logs:  # Ensure there are logs to concatenate
        df_logs = pd.concat(logs, ignore_index=True)
    else:
        df_logs = pd.DataFrame(columns=dataframe.columns)  # Handle case with no anomalies

    return df_logs

In [68]:
logs_normalAutoencoder=vec2log(possible_anomalies_normalAutoencoder, real_logs_df, False)
logs_normalAutoencoder

Unnamed: 0.1,Unnamed: 0,server_name,IP,logname,authenticate,date,petition,URL,status,bytes,referer,user-agent,score
0,173,www.sitgesanytime.com,172.56.180.71,-,-,2024-01-29 00:02:13+01:00,GET,/plantilles/turisme/img/temps3/31.svg HTTP/2.0,200,4834,https://www.sitgesanytime.com/en/pl409/blog/id...,Mozilla/5.0 (iPad; CPU OS 17_2 like Mac OS X) ...,0.28
1,174,www.sitgesanytime.com,172.56.180.71,-,-,2024-01-29 00:02:13+01:00,GET,/plantilles/turisme/img/logo-peu.png HTTP/2.0,200,5578,https://www.sitgesanytime.com/en/pl409/blog/id...,Mozilla/5.0 (iPad; CPU OS 17_2 like Mac OS X) ...,0.28
2,175,www.sitgesanytime.com,172.56.180.71,-,-,2024-01-29 00:02:13+01:00,GET,/media/site1/cache/images/sabatichotelsitges3-...,200,198690,https://www.sitgesanytime.com/en/pl409/blog/id...,Mozilla/5.0 (iPad; CPU OS 17_2 like Mac OS X) ...,0.28
3,176,www.sitgesanytime.com,172.56.180.71,-,-,2024-01-29 00:02:13+01:00,GET,/plantilles/turisme/css/estils-mw500.css?v=14 ...,200,1283,https://www.sitgesanytime.com/en/pl409/blog/id...,Mozilla/5.0 (iPad; CPU OS 17_2 like Mac OS X) ...,0.28
4,177,www.sitgesanytime.com,172.56.180.71,-,-,2024-01-29 00:02:13+01:00,GET,/plantilles/turisme/css/calendari.css HTTP/2.0,200,1242,https://www.sitgesanytime.com/en/pl409/blog/id...,Mozilla/5.0 (iPad; CPU OS 17_2 like Mac OS X) ...,0.28
...,...,...,...,...,...,...,...,...,...,...,...,...,...
13815,58369,www.sitgesanytime.com,195.154.126.93,-,-,2024-01-29 11:17:39+01:00,GET,/comu/fancybox/helpers/jquery.fancybox-buttons...,200,1134,https://www.sitgesanytime.com/de/pl64/erleben/...,Mozilla/5.0 (compatible; AhrefsBot/7.0; +http:...,0.25
13816,58370,www.sitgesanytime.com,195.154.126.93,-,-,2024-01-29 11:17:39+01:00,GET,/plantilles/turisme/js/responsivemenu.js?v=3 H...,200,1913,https://www.sitgesanytime.com/de/pl64/erleben/...,Mozilla/5.0 (compatible; AhrefsBot/7.0; +http:...,0.25
13817,58371,www.sitgesanytime.com,195.154.126.93,-,-,2024-01-29 11:17:39+01:00,GET,/comu/js/lib.js?v=3 HTTP/2.0,200,11214,https://www.sitgesanytime.com/de/pl64/erleben/...,Mozilla/5.0 (compatible; AhrefsBot/7.0; +http:...,0.25
13818,58372,www.sitgesanytime.com,195.154.126.93,-,-,2024-01-29 11:17:39+01:00,GET,/comu/js/jquery.hoverIntent.minified.js HTTP/2.0,200,1001,https://www.sitgesanytime.com/de/pl64/erleben/...,Mozilla/5.0 (compatible; AhrefsBot/7.0; +http:...,0.25


In [69]:
logs_LSTMAutoencoder=vec2log(possible_anomalies_LSTMAutoencoder, real_logs_df, True)
logs_LSTMAutoencoder

Unnamed: 0.1,Unnamed: 0,server_name,IP,logname,authenticate,date,petition,URL,status,bytes,referer,user-agent,sequence,score
0,16578,www.sitgesanytime.com,216.244.66.199,-,-,2024-01-29 04:01:28+01:00,GET,/de/pl30/planen-sie-ihre-reise/sitges-auf-eine...,200,50197,-,Mozilla/5.0 (compatible; DotBot/1.2; +https://...,0,0.39
1,16579,www.sitgesanytime.com,60.251.99.238,-,-,2024-01-29 04:01:29+01:00,GET,/en/pl330/news/calendar/id758/programa-oficial...,404,2423,-,Mozilla/5.0 (Windows NT 10.0; Win64; x64) Appl...,0,0.39
2,16580,www.sitgesanytime.com,60.251.99.238,-,-,2024-01-29 04:01:29+01:00,GET,/plantilles/turisme/css/estils-screen.css HTTP...,200,5766,https://www.sitgesanytime.com/en/pl330/news/ca...,Mozilla/5.0 (Windows NT 10.0; Win64; x64) Appl...,0,0.39
3,16581,www.sitgesanytime.com,60.251.99.238,-,-,2024-01-29 04:01:29+01:00,GET,/plantilles/turisme/img/logo-top.svg HTTP/2.0,200,190609,https://www.sitgesanytime.com/en/pl330/news/ca...,Mozilla/5.0 (Windows NT 10.0; Win64; x64) Appl...,0,0.39
4,16582,www.sitgesanytime.com,60.251.99.238,-,-,2024-01-29 04:01:29+01:00,GET,/plantilles/turisme/css/estils-base.css HTTP/2.0,200,23991,https://www.sitgesanytime.com/en/pl330/news/ca...,Mozilla/5.0 (Windows NT 10.0; Win64; x64) Appl...,0,0.39
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
375,23066,www.sitgesanytime.com,168.119.68.247,-,-,2024-01-29 05:16:15+01:00,GET,/comu/js/jquery.hoverIntent.minified.js HTTP/2.0,200,1041,https://www.sitgesanytime.com/es/planifica-el-...,Mozilla/5.0 (compatible; AhrefsBot/7.0; +http:...,18,0.4
376,23067,www.sitgesanytime.com,168.119.68.247,-,-,2024-01-29 05:16:15+01:00,GET,/comu/js/jquery.form.min.js HTTP/2.0,200,5919,https://www.sitgesanytime.com/es/planifica-el-...,Mozilla/5.0 (compatible; AhrefsBot/7.0; +http:...,18,0.4
377,23068,www.sitgesanytime.com,168.119.68.247,-,-,2024-01-29 05:16:15+01:00,GET,/plantilles/turisme/css/estils-base.css?v=23 H...,200,23991,https://www.sitgesanytime.com/es/planifica-el-...,Mozilla/5.0 (compatible; AhrefsBot/7.0; +http:...,18,0.4
378,23069,www.sitgesanytime.com,168.119.68.247,-,-,2024-01-29 05:16:15+01:00,GET,/plantilles/turisme/css/font/Brushways.woff HT...,200,160018,https://www.sitgesanytime.com/es/planifica-el-...,Mozilla/5.0 (compatible; AhrefsBot/7.0; +http:...,18,0.4
