In [1]:
from tqdm import tqdm
from typing import List, Dict
import datetime
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from flowprintOptimal.sekigo.core.flowRepresentation import FlowRepresentation,PacketFlowRepressentation
from flowprintOptimal.sekigo.dataAnalysis.vNATDataFrameProcessor import VNATDataFrameProcessor
from flowprintOptimal.sekigo.core.flowConfig import FlowConfig
import random
from flowprintOptimal.sekigo.flowUtils.flowDatasets import PacketFlowDataset
from torch.utils.data import Dataset,DataLoader
from torchsampler import ImbalancedDatasetSampler
from sklearn.model_selection import train_test_split
from flowprintOptimal.sekigo.flowUtils.commons import normalizePacketRep
import os
from joblib import Parallel, delayed
from flowprintOptimal.sekigo.flowUtils.commons import saveFlows,loadFlows
from flowprintOptimal.sekigo.dataAnalysis.dataFrameProcessor import UTMobileNetProcessor
from flowprintOptimal.sekigo.flowUtils.dataGetter import getTrainTestOOD
from sklearn.metrics import confusion_matrix
import json
import torch
from sklearn.metrics import precision_recall_fscore_support
from sklearn.metrics import accuracy_score
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
from flowprintOptimal.sekigo.modeling.neuralNetworks import LSTMNetwork,TransformerGenerator,CNNNetwork1D
#device = torch.device("cpu")
from flowprintOptimal.sekigo.utils.evaluations import Evaluator,EarlyEvaluation
from flowprintOptimal.sekigo.utils.documentor import Documenter

In [21]:
documenter = Documenter.load(name= "UTMobileNet2021_no_sample_ood_standard")

In [22]:
documenter.configs["description"]

'UTMobileNet2021 with OOD detection (no balancers used) no ood samples generated'

In [23]:
documenter.configs["data_config"]

{'dataset_name': 'UTMobileNet2021',
 'subsampleConfig': None,
 'max_flow_length': 80,
 'test_size': 0.2,
 'ood_classes': ['google-maps', 'youtube', 'messenger'],
 'do_balance': False}

In [15]:
evaluator = EarlyEvaluation(min_steps= documenter.configs["memory_fillter_config"]["min_length"], device= device, model= documenter.early_model)
all_evaluator = Evaluator(model= documenter.full_model,device= device)

In [16]:
evaluator.getMetrices(dataset= documenter.test_dataset)

{'micro_f1': 0.8888541014402004,
 'macro_f1': 0.8173771432150136,
 'accuracy': 0.8888541014402004,
 'cm': array([[379,   1,   1,   3,   0,   1,   4,   6,   2,   0,   0,   0,   0],
        [  0, 154,   0,   2,   0,   2,   4,   3,   0,   0,   0,   1,   0],
        [  0,   0, 204,   4,   0,   1,  16,   1,   0,   0,   0,   1,   0],
        [  0,   6,   3, 620,   1,   1,   7,   3,  22,   0,   0,   0,   0],
        [  0,   1,   0,   2, 206,   1,   3,  13,   1,   1,   0,   0,   0],
        [  0,   3,   0,   3,   0, 100,   1,   0,   0,   0,   0,   1,   0],
        [  1,   1,   9,   1,   0,   0, 259,   1,   0,   0,   0,   0,   8],
        [  0,   1,   2,   0,  20,   1,   4, 269,   1,   2,   0,   2,   0],
        [  1,   0,   0,  37,   0,   4,   0,   1, 234,   0,   0,   1,   0],
        [  0,   0,   0,   0,   1,   0,   1,  12,   0, 153,   4,   5,   0],
        [  0,   0,   0,   2,   0,   0,   1,   0,   1,   0,  67,   2,   0],
        [  0,   0,   1,   0,   1,   0,   2,   2,   0,   3,   2, 152,  

In [17]:
all_evaluator.getMetrices(dataset= documenter.test_dataset)

{'micro_f1': 0.9283030682529744,
 'macro_f1': 0.911474829395677,
 'accuracy': 0.9283030682529744,
 'cm': array([[381,   2,   4,   2,   1,   0,   1,   8,   1,   0,   0,   1,   0],
        [  1, 159,   3,   1,   1,   0,   1,   0,   0,   0,   0,   1,   1],
        [  1,   1, 212,   2,   0,   0,   9,   2,   0,   0,   0,   0,   1],
        [  4,   3,   7, 642,   3,   0,   5,   2,   5,   0,   0,   1,   0],
        [  2,   0,   2,   1, 193,   2,   4,  32,   0,   1,   0,   1,   1],
        [  0,   2,   1,   0,   2, 104,   1,   0,   0,   0,   0,   1,   0],
        [  0,   1,   4,   0,   0,   0, 273,   1,   1,   0,   0,   1,   3],
        [  0,   1,   2,   1,   0,   0,   6, 301,   0,   1,   0,   2,   0],
        [  0,   1,   0,   7,   1,   0,   0,   2, 266,   0,   0,   1,   0],
        [  0,   1,   1,   0,   0,   0,   0,   6,   0, 164,   3,   4,   0],
        [  2,   1,   0,   1,   0,   0,   0,   0,   0,   0,  71,   0,   0],
        [  0,   2,   1,   0,   0,   0,   0,   3,   1,   2,   1, 154,   

In [18]:
info = documenter.getScores(device= device)

In [19]:
train_counts, test_counts = info["train_dataset_counts"],info["test_dataset_counts"]
train_counts , test_counts = dict(train_counts), dict(test_counts)

In [20]:
train_counts

{'google-maps': 2584,
 'netflix': 1680,
 'reddit': 1295,
 'youtube': 1031,
 'facebook': 1028,
 'instagram': 994,
 'pinterest': 994,
 'google-drive': 709,
 'spotify': 699,
 'twitter': 682,
 'gmail': 400,
 'hangout': 351,
 'messenger': 329}

In [10]:
total_flows = train_counts.copy()

for key in total_flows:
    total_flows[key] += test_counts[key]

In [11]:
total_flows

{'BROWSERS': 18820, 'P2P': 14175, 'MAIL': 4432, 'OTHER': 2368, 'Skype': 499}

In [13]:
info["full_metrices"]["cm"]

array([[2033,  122,    0,    4],
       [  67, 2129,    0,    5],
       [   0,    2, 1086,    0],
       [   2,    0,    0, 1789]])

In [11]:
info["early_metrices"]["cm"]

array([[2006,  104,    4,   28],
       [ 157, 2035,    0,    6],
       [   1,    2, 1081,    4],
       [   2,    8,    0, 1781]])

In [12]:
info["labels"]

['streaming', 'FT', 'chat', 'control']