In [1]:
import numpy as np
import torch
import torch.nn as nn
from flowprintOptimal.sekigo.flowUtils.conversions import convertPacketRepToTimeslotRepEffecient
from flowprintOptimal.sekigo.modeling.neuralNetworks import LSTMDuelingNetwork
from flowprintOptimal.sekigo.core.flowRepresentation import PacketFlowRepressentation,TimeslotRepresentation
from flowprintOptimal.sekigo.flowUtils.commons import getActivityArrayFromTimeslotRep, loadFlows
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
from flowprintOptimal.sekigo.utils.documentor import Documenter
import pandas as pd
from tqdm import tqdm
from sklearn.metrics import precision_recall_fscore_support
from flowprintOptimal.sekigo.flowUtils.packetDropping import getPacketDroppedPacketDataset

In [2]:
np.log2(1/13)

-3.700439718141092

In [3]:
packet_documenter = Documenter.load("vnat_ood")
timeslot_documenter = Documenter.load("vnat_timeslot_ood")

In [4]:
timeslot_documenter.train_dataset

<flowprintOptimal.sekigo.flowUtils.flowDatasets.DDQNActivityDataset at 0x75be207c2050>

In [5]:
packet_documenter.train_dataset[0]["data"].shape

(15, 3)

In [6]:
pd.Series(map(lambda x : len(x["data"]), timeslot_documenter.train_dataset)).describe()

count    2073.000000
mean       13.051134
std         2.567297
min         8.000000
25%        11.000000
50%        15.000000
75%        15.000000
max        15.000000
dtype: float64

In [7]:
timeslot_documenter.test_dataset.flows[0]

<flowprintOptimal.sekigo.core.flowRepresentation.TimeslotRepresentation at 0x75be20508220>

In [8]:
class CombinedEval:
    def __init__(self,packet_documenter : Documenter, timeslot_documenter : Documenter,device, threshold = .25):
        self.packet_documenter = packet_documenter
        self.timeslot_documenter = timeslot_documenter
        self.threshold = threshold

        self.packet_documenter.early_model.to(device)
        self.timeslot_documenter.early_model.to(device)
        self.grain = timeslot_documenter.train_dataset.flow_config.grain
        self.band_threshold = timeslot_documenter.train_dataset.flow_config.band_thresholds
        self.device = device


        self.rearrange_indices = np.array(CombinedEval.getRearrangingIndices(packet_documenter.train_dataset.label_to_index,timeslot_documenter.train_dataset.label_to_index))

    @staticmethod
    def getRearrangingIndices(src_dct,dst_dct):
        assert len(src_dct) == len(dst_dct) 
        re_arrange = [None]*len(src_dct)

        for key,index in src_dct.items():
            re_arrange[index] = dst_dct[key]
        
        return re_arrange + [len(src_dct)]
    

    def __processSinglePrediction(self,prediction,num_classes):
        """
        predictions are of shape (seq_len)
        """
        # min_steps - 1 as if the min steps is 5 then after proccessing the 5th timestep index will be 4 !!!!
        for time in range(self.min_steps -1,len(prediction)):
            if prediction[time] < num_classes:
                return (prediction[time],time + 1)
        
        return (-1,len(prediction))
    
    

    def getConfidence(self,output):
        """
        output is if shape (TS,num_classes)

        returns maximal class and its score
        """
        def softmax(x):
            # x is of dim (TS,num_classes)
            e_x = np.exp(x)
            return e_x / e_x.sum(axis=1, keepdims = True)
        confs = softmax(output[:,:-1])
        return np.argmax(confs,axis = 1), np.max(confs, axis= 1)


    def proccessPacketSlots(self,packet_out,timeslot_out,packet_nums):
        def makeRangesFromPacketNums(packet_nums):
            ranges = []
            for packet_num in packet_nums:
                if len(ranges) == 0:
                    ranges.append((0,packet_num - 1))
                else:
                    start = ranges[-1][1] + 1
                    ranges.append((start, start + packet_num - 1))
            return ranges
        
        def processRange(packet_range_out, timeslot_index):
            p_val = -1
            p_conf = 0
            ts_val, ts_conf = self.getConfidence(output= timeslot_out[timeslot_index:timeslot_index+1])
            ts_val = self.rearrange_indices[ts_val]
            packet_preds = np.argmax(packet_range_out, axis= 1)
            max_p_conf,max_p_val = 0,-1
            packets_used = 0

            for i in range(len(packet_range_out)):
                packets_used += 1
                if packet_preds[i] != len(packet_range_out[0]) - 1:
                    p_val,p_conf = self.getConfidence(output= packet_range_out[i:i+1])
                    if max_p_conf < p_conf:
                        max_p_conf = p_conf
                        max_p_val = p_val

                    assert p_val == packet_preds[i]
                    
                    if p_val != -1:
                        # packet prediction is made
                        if p_conf > self.threshold:
                            return p_val.item(),packets_used



            if max_p_val != -1:
                if max_p_val == ts_val:
                    #print("ts used")
                    return max_p_val.item(),packet_nums[timeslot_index]
            
            return -1,packet_nums[timeslot_index]
                
        ranges = makeRangesFromPacketNums(packet_nums= packet_nums)


        output_prediction = -1
        packets_taken = 0
        for i,p_range in enumerate(ranges):
            prediction,r_packet_taken = processRange(packet_out[p_range[0]:p_range[1]+1],i)
            packets_taken += r_packet_taken
            if prediction != -1:
                output_prediction = prediction
                break
            

        return output_prediction,packets_taken




    def infer(self,packet_flow_rep : PacketFlowRepressentation):
        #class_type = self.packet_documenter.train_dataset.label_to_index[packet_flow_rep.class_type]
        timeslot_rep : TimeslotRepresentation = convertPacketRepToTimeslotRepEffecient(packet_flow_rep= packet_flow_rep,grain= self.grain, band_thresholds=self.band_threshold)


        if len(timeslot_rep) > 15:
            timeslot_rep = timeslot_rep.getSubFlow(start_index= 0, length= 15)

        packet_nums = (timeslot_rep.down_packets + timeslot_rep.up_packets).sum(axis = 0) # this is a list with the number of packets
        total_packets = packet_nums.sum()
       
        
        if len(packet_flow_rep) > total_packets:
            packet_flow_rep = packet_flow_rep.getSubFlow(start_index= 0, length= total_packets)

      

        packet_flow_input = np.array([packet_flow_rep.lengths,packet_flow_rep.inter_arrival_times,packet_flow_rep.directions]).T
        packet_flow_input = torch.tensor(packet_flow_input).unsqueeze(0).to(self.device).float()
        timeslot_flow_input = getActivityArrayFromTimeslotRep(timeslot_rep)
        timeslot_flow_input = torch.tensor(timeslot_flow_input).unsqueeze(0).to(self.device).float()

        with torch.no_grad():
            packet_out,_ = self.packet_documenter.early_model.earlyClassificationForward(packet_flow_input)
            timeslot_out,_ = self.timeslot_documenter.early_model.earlyClassificationForward(timeslot_flow_input)
    
        packet_out = packet_out[0].cpu().numpy()
        timeslot_out = timeslot_out[0].cpu().numpy()

        prediction,packets_taken = self.proccessPacketSlots(packet_out= packet_out, timeslot_out= timeslot_out, packet_nums= packet_nums)
        return prediction,packets_taken
    


In [9]:
combined_eval = CombinedEval(packet_documenter,timeslot_documenter,device=device, threshold= .35)

In [10]:
len(packet_documenter.test_dataset.flows[0])

57

In [11]:
packet_dropped_dataset = getPacketDroppedPacketDataset(packet_dataset= packet_documenter.test_dataset, max_drop_rate= .05)

In [12]:
predicted = []
true = []
packets_taken = []
for i,flow_rep in tqdm(enumerate(packet_documenter.test_dataset.flows)):
    p,pt = combined_eval.infer(flow_rep)
    predicted.append(p)
    packets_taken.append(pt)
    true.append(packet_documenter.test_dataset[i]["label"])

519it [00:03, 161.36it/s]


In [19]:
ood_predictions = []
for i,flow_rep in tqdm(enumerate(packet_documenter.ood_dataset.flows)):
    p,pt = combined_eval.infer(flow_rep)
    ood_predictions.append(p)


459it [00:01, 306.77it/s]


In [38]:
predicted = []
true = []
packets_taken = []
for i,flow_rep in tqdm(enumerate(packet_dropped_dataset.flows)):
    p,pt = combined_eval.infer(flow_rep)
    predicted.append(p)
    packets_taken.append(pt)
    true.append(packet_documenter.test_dataset[i]["label"])

10023it [00:33, 296.73it/s]


In [13]:
predicted = np.array(predicted)
packets_taken = np.array(packets_taken)
true = np.array(true)
included = predicted != -1
precision_recall_fscore_support(y_true= true[included], y_pred= predicted[included], average= "macro")

(0.9923913043478261, 0.9962151728768043, 0.9942848457631964, None)

In [14]:
included.sum()/len(predicted)

1.0

In [64]:
(true[included] == predicted[included]).sum()/included.sum()

0.9791983764586504

In [15]:
packets_taken.mean()

3.8921001926782273

In [20]:
(np.array(ood_predictions) == -1).sum()/len(ood_predictions)

0.006535947712418301

In [22]:
ood_predictions

[2,
 2,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 2,
 1,
 1,
 1,
 2,
 2,
 2,
 2,
 2,
 -1,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,