In [1]:
import torch

In [2]:
from pkg.datasets import IDSDataset
from pkg.lit_ae import LitIDSAE

In [3]:
from torchvision import transforms, utils
from tqdm import tqdm
import pickle
import pandas as pd
# Маппинги айпи адресов
with open('Data/sip_map.pkl', 'rb') as f:
    sip_map = pickle.load(f)
with open('Data/dip_map.pkl', 'rb') as f:
    dip_map = pickle.load(f)

In [4]:
PATH = 'Models/autoencoder-final-epoch=26-step=68302.ckpt'
model = LitIDSAE.load_from_checkpoint(PATH, sip_map=sip_map, dip_map=dip_map, map_location=torch.device('cpu'), use_category=False)

In [5]:
from torchvision import transforms, utils
from tqdm import tqdm, tqdm_notebook
import pickle
import pandas as pd

ds_test = IDSDataset('Data/all_test.csv', sip_map, dip_map, only_anomaly=False, transform=True,
                    stat=pd.read_csv('Data/all_stat.csv').rename({0:'count', 1:'mean', 2:'std', 3:'min', 4:'25%', 5:'50%', 6:'75%', 7:'max'}))
test_dataloader = torch.utils.data.DataLoader(ds_test, batch_size=3**5,
                        shuffle=False, num_workers=4)

In [6]:
device = torch.device('cpu')
model = model.to(device)

In [7]:
losses = []
mses =[]
labels = []
with torch.no_grad():
    model.eval()
    for row in tqdm(test_dataloader):
        categorical = row[0].to(device)
        continuous =  row[1].to(device).unsqueeze(-2)
        labels.append(row[2])
        x = model.categorical_emb(categorical, continuous) 
        x_hat, mu = model(x)
        # dist = -0.5 * torch.sum(1 + logvar - mu.pow(2) - logvar.exp(), dim = -1)
        mse = torch.sum(torch.nn.functional.mse_loss(x_hat, continuous, reduction='none'), dim=-1)
        mses.append(mse.squeeze())
        # dist = torch.mean(dist, dim=-1)
        # losses.append(dist.squeeze())

100%|██████████| 871/871 [03:34<00:00,  4.07it/s]


In [8]:
losses[0].shape

IndexError: list index out of range

In [None]:
torch.cat(losses)

In [None]:
new_dict = {'dist': torch.cat(losses).cpu().numpy(), 'mse':torch.cat(mses).cpu().numpy()}
labels_cat = torch.cat(labels) != 0
# losses_cat = torch.cat(val_losses).cpu()
test_out = pd.DataFrame({ 'is_benign':labels_cat, 'attack_class': torch.cat(labels).numpy(), **new_dict})
test_out.to_csv('test_results_ae.csv')

In [None]:
import numpy as np
new_dict['dist'][np.isnan(new_dict['dist'])] = 0
new_dict['dist'][np.isinf(new_dict['dist'])] = 1e+6

In [None]:
from sklearn.metrics import auc, roc_curve
import matplotlib.pyplot as plt
fpr, tpr, thresholds = roc_curve(labels_cat.int().numpy(), new_dict['mse'])
roc_auc = auc(fpr, tpr)
fig, ax = plt.subplots()
ax.plot(fpr, tpr, 
             label='ROC curve (area = {0:0.2f})'
             ''.format(roc_auc))
ax.plot([0, 1], [0, 1], color='navy', linestyle='--')
ax.set_xlim([0.0, 1.0])
ax.set_ylim([0.0, 1.05])
ax.set_xlabel('False Positive Rate')
ax.set_ylabel('True Positive Rate')
ax.legend(loc="lower right")

In [None]:
thresholds.shape

In [None]:
IDX = -10
threshold = thresholds[IDX]
test_out['predicted'] = test_out.dist < threshold

In [None]:
from sklearn.metrics import classification_report, f1_score
y_gts = test_out['is_benign']
y_preds = test_out['predicted']
rep = classification_report(y_gts, y_preds, target_names=['Attack', 'No attack'])
print(rep)

<a style='text-decoration:none;line-height:16px;display:flex;color:#5B5B62;padding:10px;justify-content:end;' href='https://deepnote.com?utm_source=created-in-deepnote-cell&projectId=56316f98-13a2-452c-be31-8997c1e2f7a7' target="_blank">
 </img>
Created in <span style='font-weight:600;margin-left:4px;'>Deepnote</span></a>