This file includes the code of evaluating the anomaly detector as described in section 4.3

In [1]:
import gzip
import torch
import pandas as pd
from collections import defaultdict
import uuid
import numpy as np
from tqdm import tqdm
from torch.nn.functional import softmax, cross_entropy
import networkx as nx
import pickle


# Load data

This data includes the events that flagged as suspicious and splitted to train/var/test- as described in section 4.2 
The splitting of data is occurred in - 'Make sequence dataset.ipynb'

In [2]:
#choose dataset by name
ds_name = "Theia" #Cadets, Theia

In [3]:
data_folder = f"dataset/{ds_name.lower()}/"
events_df = pd.read_csv(data_folder+f"{ds_name}_Krystal_transformation.csv")
seq_len = 10 # set seq len

In [4]:
set_name = "val" #train, val, test

fname_data_for_model = f"{ds_name}_{set_name}_data_for_model.pkl"
fname_data_for_graph = f"{ds_name}_{set_name}_data_for_graph.pkl"


val_data_for_model = pickle.load(open(data_folder+fname_data_for_model, "rb" ) )
val_data_for_graph = pickle.load(open(data_folder+fname_data_for_graph, "rb" ) )

In [5]:
set_name = "test" #train, val, test

fname_data_for_model = f"{ds_name}_{set_name}_data_for_model.pkl"
fname_data_for_graph = f"{ds_name}_{set_name}_data_for_graph.pkl"

test_data_for_model = pickle.load(open(data_folder+fname_data_for_model, "rb" ) )
test_data_for_graph = pickle.load(open(data_folder+fname_data_for_graph, "rb" ) )

In [6]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [7]:
print(f"{len(val_data_for_model['data_for_sequences'])} sequences are validation sequences")
print(f"Next {len(test_data_for_model['data_for_sequences'])} are test sequences")

104 sequences are validation sequences
Next 400 are test sequences


# Load anomaly detection model

In [8]:
import pandas as pd
import torch
from torch.utils.data import Dataset, Sampler
import numpy as np
from torch.utils.data import DataLoader

In [9]:
import torch
import torch.nn as nn
from torch.autograd import Variable
from anomaly_detector_architecture import ad_model

In [10]:
options = dict()
options['input_size'] =  41 #len(train_logs[0][0])
options['hidden_size'] = 128
options['num_layers'] = 2
options['num_classes'] = 6
options['batch_size'] = 64
options['p_dropout'] = 0.3
options['bidirectional'] = False
options['seq_len'] = 10

In [11]:
model = ad_model(input_size=options['input_size'],
                 hidden_size=options['hidden_size'],
                 num_layers=options['num_layers'],
                 num_classes=options['num_classes'],
               p_dropout=options['p_dropout'],
               bidirectional=options['bidirectional'])
model

ad_model(
  (lstm): LSTM(41, 128, num_layers=2, batch_first=True, dropout=0.3)
  (fc): Linear(in_features=128, out_features=6, bias=True)
)

In [12]:
# fill model with the trained weights
model_path = "anomaly_detection_model.pth"
model.load_state_dict(torch.load(model_path))
model = model.to(device)

# Find anomaly threshold from validation set

In [13]:
model.eval()

ad_model(
  (lstm): LSTM(41, 128, num_layers=2, batch_first=True, dropout=0.3)
  (fc): Linear(in_features=128, out_features=6, bias=True)
)

In [14]:
anomaly_score_ls = []
candidates = 1

with torch.no_grad():

    for seq in tqdm(val_data_for_model['data_for_sequences']):
        ind = seq['seq_ind']
        logs_input = seq['logs_for_sequence']
        next_event_types = seq['next_event_types']
        
        logs_input = torch.tensor([logs_input], dtype=torch.long).to(device)
        next_event_types = torch.tensor(next_event_types, dtype=torch.long).to(device)
        output = model(logs_input)
        
        soft_out = softmax(output, dim=-1).squeeze()
        sorted_preds = torch.argsort(output.squeeze(), dim=-1 ,descending=True)
        anomaly_score = 0
        for ind_event, pred, correct_event_type in zip(range(seq_len), sorted_preds, next_event_types):
            top_k_preds = pred[:candidates] 
            if correct_event_type not in top_k_preds: 
                anomaly_score += 0.2 * (1-soft_out[ind_event, correct_event_type])
        anomaly_score_ls.append(anomaly_score)

100%|███████████████████████████████████████████████████████████████████████████████| 104/104 [00:00<00:00, 601.45it/s]


In [15]:
np.average(anomaly_score_ls), np.max(anomaly_score_ls)

(0.2519275, 0.44127148)

# Evaluations - for the selected dataset

In [16]:
model.eval()

FP = 0
FN = 0
TP = 0
TN = 0

malicious_seq = [] # indices of the malicious sequences

FP_ls = []
TP_ls = []
threshold_anomaly_score = 0.5
candidates = 1

with torch.no_grad():
    for seq in tqdm(test_data_for_model['data_for_sequences']):
        seq_ind = seq['seq_ind']
        logs_input = seq['logs_for_sequence']
        next_event_types = seq['next_event_types']
        malicious_label = seq['label']
#         print(seq_ind, malicious_label)
        logs_input = torch.tensor([logs_input], dtype=torch.long).to(device)
        output = model(logs_input)

        next_event_types = torch.tensor(next_event_types, dtype=torch.long).to(device)

        soft_out = softmax(output, dim=-1).squeeze()
        sorted_preds = torch.argsort(output.squeeze(), dim=-1 ,descending=True)
        anomaly_score = 0
        

        for ind_event, pred, correct_event_type in zip(range(seq_len), sorted_preds, next_event_types):
            top_k_preds = pred[:candidates] 
            if correct_event_type not in top_k_preds: 
                normalized_mistake = 0.2 * (1-soft_out[ind_event, correct_event_type])
                anomaly_score += normalized_mistake
                


        anomaly_score = round(float(anomaly_score),3)
        if malicious_label == 1: # malicious
            malicious_seq.append(seq_ind)
            if anomaly_score<threshold_anomaly_score:
                FN += 1
#                 print(f"FN: seq ind-{seq_ind} a.s:{anomaly_score}")

            else:
#                 print(f"TP: seq ind-{seq_ind} a.s:{anomaly_score}")
                TP_ls.append(seq_ind)
                TP += 1
        elif malicious_label == 0: # benign
            if anomaly_score<threshold_anomaly_score:
                TN += 1
#                 print(f"TN: seq ind-{seq_ind} a.s:{anomaly_score}")
            else:
#                 print(f"FP: seq ind-{seq_ind} a.s:{anomaly_score}")
                FP_ls.append(seq_ind)
                FP += 1


100%|███████████████████████████████████████████████████████████████████████████████| 400/400 [00:00<00:00, 699.34it/s]


In [17]:
FPR = FP/(FP+TN)
P = TP / (TP + FP) if TP + FP>0 else 0
R = TP / (TP + FN) if TP + FN>0 else 0
print('true positive: {}, \nfalse positive (FP): {}, \ntrue negative: {}, \nfalse negative (FN): {}, \nPrecision: {:.3f}, \nRecall: {:.3f}, \nFalse positive rate: {:.3f}'
    .format(TP, FP, TN, FN, P, R, FPR))

true positive: 25, 
false positive (FP): 37, 
true negative: 337, 
false negative (FN): 1, 
Precision: 0.403, 
Recall: 0.962, 
False positive rate: 0.099
