In [1]:
from tqdm import tqdm
from typing import List, Dict
import datetime
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from flowprintOptimal.sekigo.core.flowRepresentation import FlowRepresentation,PacketFlowRepressentation
from flowprintOptimal.sekigo.dataAnalysis.vNATDataFrameProcessor import VNATDataFrameProcessor
from flowprintOptimal.sekigo.core.flowConfig import FlowConfig
import random
from flowprintOptimal.sekigo.flowUtils.flowDatasets import PacketFlowDataset
from torch.utils.data import Dataset,DataLoader
from torchsampler import ImbalancedDatasetSampler
from sklearn.model_selection import train_test_split
from flowprintOptimal.sekigo.flowUtils.commons import normalizePacketRep
import os
from joblib import Parallel, delayed
from flowprintOptimal.sekigo.flowUtils.commons import saveFlows,loadFlows
from flowprintOptimal.sekigo.dataAnalysis.dataFrameProcessor import UTMobileNetProcessor
from flowprintOptimal.sekigo.flowUtils.dataGetter import getTrainTestOOD
from sklearn.metrics import confusion_matrix
import json
from flowprintOptimal.sekigo.modeling.trainers import NNClassificationTrainer
from flowprintOptimal.sekigo.modeling.neuralNetworks import LSTMNetwork,TransformerGenerator,CNNNetwork1D
from flowprintOptimal.sekigo.modeling.loggers import Logger
import torch
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
#device = torch.device("cpu")
from flowprintOptimal.sekigo.earlyClassification.DQL.core import MemoryElement,Rewarder,State
from flowprintOptimal.sekigo.earlyClassification.DQL.memoryFiller import MemoryFiller
from flowprintOptimal.sekigo.earlyClassification.DQL.datasets import MemoryDataset
from flowprintOptimal.sekigo.earlyClassification.DQL.trainers import EarlyClassificationtrainer
from flowprintOptimal.sekigo.utils.documentor import Documenter
from flowprintOptimal.sekigo.utils.evaluations import Evaluator,EarlyEvaluation
from flowprintOptimal.sekigo.flowUtils.commons import getTimeStampsFromIAT, getIATFromTimeStamps
import warnings
warnings.filterwarnings('ignore')

In [11]:
arr = []
l = 15
lam = .15
for i in range(5,l+1):
    arr.append(lam*(i/l))

In [12]:
sum(arr)

1.0999999999999999

In [2]:
configs = dict(
    name = "UTMobileNet2021_no_sample_ood_alpha",
    description = "UTMobileNet2021 with OOD detection (no balancers used) no ood samples generated alpha of .5",
    
    common_config = dict(
        max_length = 15
    ),
    
    full_model_kwargs = dict(
        lstm_hidden_size = 256,
        layers= 2, lstm_input_size = 3
    ),

    early_model_kwargs = dict(
        lstm_input_size= 3,lstm_hidden_size= 256,layers = 2        
    ),
    
    data_config = dict(
        dataset_name = "UTMobileNet2021",
        subsampleConfig = None,#dict(max_gap = 20, min_gap = 5),
        max_flow_length = 80, # in seconds  ( each flow sample cannot excede this length)
        test_size = .2,
        ood_classes = ["google-maps", "youtube","messenger"],
        do_balance = False

    ),

    rewarder_config = dict(
        l = .1
    ),

    dataset_config = dict(
        aug = [0,.2]
    ),

    memory_fillter_config = dict(
        ood_config = None,#dict(ood_aug = [.6,.9], ood_prob = .2),
        min_length = 5,
        use_balancer = False
    ),
    full_trainer_config = dict(
        use_sampler = False
    ),
    early_trainer_config = dict(
        use_sampler = False  # this is for giving more weight to wait samples
    )


)

In [3]:
train_flows,test_flows,ood_flows = getTrainTestOOD(**configs["data_config"], packet_limit= configs["common_config"]["max_length"])

full class distrubation
google-maps     3256
netflix         2081
reddit          1609
facebook        1312
youtube         1309
pinterest       1233
instagram       1222
spotify          878
google-drive     877
twitter          847
gmail            511
hangout          426
messenger        411
Name: count, dtype: int64
using no sampling
filtering max_flow_length = 80
post num packet filter class distrubation
google-maps     3256
netflix         2081
reddit          1609
facebook        1312
youtube         1309
pinterest       1233
instagram       1222
spotify          878
google-drive     877
twitter          847
gmail            511
hangout          426
messenger        409
Name: count, dtype: int64
------------------------------
train class distrubation
netflix         1681
reddit          1290
facebook        1025
pinterest        990
instagram        973
google-drive     711
spotify          704
twitter          682
gmail            412
hangout          328
Name: count, dtype: i

In [4]:
train_dataset = PacketFlowDataset(flows= train_flows,label_to_index= None,aug= configs["dataset_config"]["aug"])
test_dataset = PacketFlowDataset(flows= test_flows,label_to_index= train_dataset.label_to_index)
ood_dataset = PacketFlowDataset(flows= ood_flows, label_to_index= None) if (ood_flows != None and len(ood_flows) != 0) else None

In [5]:
original_IAT = train_flows[0].inter_arrival_times
timestamps = getTimeStampsFromIAT(original_IAT)
processed_IAT = getIATFromTimeStamps(timestamps= timestamps)

In [8]:
train_dataset[0]["data"]

array([[0.05066667, 0.        , 0.        ],
       [0.05066667, 0.66044052, 1.        ],
       [0.04533333, 0.35332852, 0.        ],
       [0.194     , 0.47408577, 0.        ],
       [0.04533333, 0.65617666, 1.        ],
       [0.87066667, 0.56168224, 1.        ],
       [0.04533333, 0.29616328, 0.        ],
       [0.87066667, 0.46921004, 1.        ],
       [0.04533333, 0.27765287, 0.        ],
       [0.85866667, 0.23180299, 1.        ],
       [0.04533333, 0.26721528, 0.        ],
       [0.10733333, 0.51535638, 0.        ],
       [0.40266667, 0.66347016, 0.        ],
       [0.21933333, 0.42096119, 1.        ],
       [0.07533333, 0.34416099, 1.        ]])

In [9]:
np.random.choice(a= np.arange(10), size= 2, replace= False)

array([4, 2])

In [69]:
flow_rep = train_flows[0]
flow_rep.lengths

[0.5913333333333334,
 0.06866666666666667,
 0.04533333333333334,
 0.8706666666666667,
 0.04533333333333334,
 0.75,
 0.04533333333333334,
 0.8706666666666667,
 0.574,
 0.04533333333333334,
 0.06866666666666667,
 0.04533333333333334,
 0.3466666666666667,
 0.06866666666666667,
 0.04533333333333334]

In [73]:
len(flow_rep)

15

In [91]:
aug_rep = dropPacketAug(flow_rep= flow_rep, required_length= 10, max_drop_rate= .9)

0
