In [1]:
import torch

from pkg.datasets import IDSDataset
from pkg.lit_vae import LitIDSVAE
from pkg.lit_sad import LitIDSVAE_SAD

In [2]:
from torchvision import transforms, utils
from tqdm import tqdm
import pickle
import pandas as pd
# Маппинги айпи адресов
with open('Data/sip_map.pkl', 'rb') as f:
    sip_map = pickle.load(f)
with open('Data/dip_map.pkl', 'rb') as f:
    dip_map = pickle.load(f)

In [3]:
autoencoder = LitIDSVAE_SAD(sip_map, dip_map, 128, 512, use_category=False)

In [4]:
PATH = 'Models/vae-final-epoch=26-step=68421.ckpt'
base_enc = LitIDSVAE.load_from_checkpoint(PATH, sip_map=sip_map, dip_map=dip_map, map_location=torch.device('cpu'))

In [5]:
autoencoder.init_encoder(base_enc.state_dict())

In [6]:
# Use min max stats from whole dataset
ds_train = IDSDataset('Data/all_train.csv', sip_map, dip_map, only_benign=True, transform=True,
                     stat=pd.read_csv('Data/all_stat.csv').rename({0:'count', 1:'mean', 2:'std', 3:'min', 4:'25%', 5:'50%', 6:'75%', 7:'max'}))
#ds_test = IDSDataset('Data/all_test.csv', sip_map, dip_map, only_benign=True, transform=True)

In [7]:
from torch.utils.data import DataLoader
BATCH_SIZE = 3 ** 5
train_loader = DataLoader(ds_train, batch_size=BATCH_SIZE,
                        shuffle=True, num_workers=4)

In [8]:
autoencoder = autoencoder.to(torch.device('cpu'))
autoencoder.init_center_c(train_loader)

In [10]:
import pytorch_lightning as pl
from pytorch_lightning.loggers import TensorBoardLogger
logger = TensorBoardLogger(
    save_dir='exps',
#     version=1,
    name='vae-sad'
)
trainer = pl.Trainer(gpus=None,logger=logger)

GPU available: False, used: False
TPU available: False, using: 0 TPU cores


In [11]:
BATCH_SIZE = 3 ** 5
ds_train = IDSDataset('Data/all_train.csv', sip_map, dip_map, only_benign=False, transform=True,
                                     stat=pd.read_csv('Data/all_stat.csv').rename({0:'count', 1:'mean', 2:'std', 3:'min', 4:'25%', 5:'50%', 6:'75%', 7:'max'}))
train_loader = DataLoader(ds_train, batch_size=BATCH_SIZE,
                        shuffle=True, num_workers=4)

In [12]:
trainer.fit(autoencoder, train_loader)


  | Name           | Type       | Params
----------------------------------------------
0 | continuous_emb | Linear     | 8.7 K 
1 | encoder        | Sequential | 231 K 
2 | layer_mean     | Linear     | 16.4 K
3 | layer_logvar   | Linear     | 16.4 K
----------------------------------------------
272 K     Trainable params
0         Non-trainable params
272 K     Total params
1.091     Total estimated model params size (MB)


HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Training', layout=Layout(flex='2'), max…



In [13]:
batch = next(iter(train_loader))

In [14]:
categorical = batch[0]
continuous =  batch[1].unsqueeze(-2)
labels = batch[2].unsqueeze(-1)
x = autoencoder.categorical_emb(categorical, continuous)
mu_p, logvar_p = autoencoder(x)

dist = 0.5 * torch.sum(autoencoder.v_c - logvar_p  - 1 + (mu_p - autoencoder.m_c) * \
                    (1 / autoencoder.v_c.exp()) * (mu_p - autoencoder.m_c) + \
                    (1 / autoencoder.v_c.exp()) * logvar_p.exp(), dim = -1)
# dist = 0.5 * (logvar_p - autoencoder.v_c  - 1 + (autoencoder.m_c - mu_p) * \
#             (1 / logvar_p.exp()) * (autoencoder.m_c - mu_p) + \
#             (1 / logvar_p.exp()) * autoencoder.v_c.exp())
# losses = torch.where(labels == 0, dist, autoencoder.eta * ((dist + autoencoder.eps) ** -1))

<a style='text-decoration:none;line-height:16px;display:flex;color:#5B5B62;padding:10px;justify-content:end;' href='https://deepnote.com?utm_source=created-in-deepnote-cell&projectId=56316f98-13a2-452c-be31-8997c1e2f7a7' target="_blank">
 </img>
Created in <span style='font-weight:600;margin-left:4px;'>Deepnote</span></a>