In [1]:
import numpy as np
import torch
import torch.nn as nn
from flowprintOptimal.sekigo.modeling.neuralNetworks import BaseLSTMNetwork
import random
from tqdm import tqdm
from typing import List, Dict
import datetime
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from flowprintOptimal.sekigo.core.flowRepresentation import FlowRepresentation,PacketFlowRepressentation
from flowprintOptimal.sekigo.dataAnalysis.vNATDataFrameProcessor import VNATDataFrameProcessor
from flowprintOptimal.sekigo.core.flowConfig import FlowConfig
import random
from flowprintOptimal.sekigo.flowUtils.flowDatasets import PacketFlowDataset, BaseFlowDataset
from torch.utils.data import Dataset,DataLoader
from torchsampler import ImbalancedDatasetSampler
from sklearn.model_selection import train_test_split
from flowprintOptimal.sekigo.flowUtils.commons import normalizePacketRep
import os
from joblib import Parallel, delayed
from flowprintOptimal.sekigo.flowUtils.commons import saveFlows,loadFlows
from flowprintOptimal.sekigo.dataAnalysis.dataFrameProcessor import UTMobileNetProcessor
from flowprintOptimal.sekigo.flowUtils.dataGetter import getTrainTestOOD
from sklearn.metrics import confusion_matrix
import json
from flowprintOptimal.sekigo.modeling.trainers import NNClassificationTrainer
from flowprintOptimal.sekigo.modeling.neuralNetworks import LSTMNetwork,TransformerGenerator,CNNNetwork1D, LSTMDuelingNetwork
from flowprintOptimal.sekigo.modeling.loggers import Logger
import torch
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
#device = torch.device("cpu")
from flowprintOptimal.sekigo.earlyClassification.DQL.core import MemoryElement,Rewarder,State
from flowprintOptimal.sekigo.earlyClassification.DQL.memoryFiller import MemoryFiller
from flowprintOptimal.sekigo.earlyClassification.DQL.datasets import MemoryDataset
from flowprintOptimal.sekigo.earlyClassification.DQL.trainers import EarlyClassificationtrainer
from flowprintOptimal.sekigo.utils.documentor import Documenter
from flowprintOptimal.sekigo.utils.evaluations import Evaluator,EarlyEvaluation
import warnings
from copy import deepcopy
import torch.nn as nn
from torch.distributions.categorical import Categorical
from flowprintOptimal.sekigo.flowUtils.commons import dropPacketFromPacketRep
from typing import List
warnings.filterwarnings('ignore')

In [63]:
configs = dict(
    name = "VNAT_no_sample_ood_dueling",
    description = "Sample UNIBS dueling with ood detection and ood samples generated, reward function gives out 0 for ood",
    
    common_config = dict(
        max_length = 15
    ),
    
    full_model_kwargs = dict(
        lstm_hidden_size = 256,
        layers= 2, lstm_input_size = 3
    ),

    early_model_kwargs = dict(
        lstm_input_size= 3,lstm_hidden_size= 256,layers = 2        
    ),
    
    data_config = dict(
        dataset_name = "VNAT",
        subsampleConfig = None,#dict(max_gap = 30, min_gap = 10),
        max_flow_length = 80, # in seconds  ( each flow sample cannot excede this length)
        test_size = .2,
        ood_classes = [],
        do_balance = False

    ),

    rewarder_config = dict(
        l = .5
    ),

    dataset_config = dict(
        aug = [0,.2]
    ),

    memory_fillter_config = dict(
        ood_config = dict(ood_aug = [.6,.9], ood_prob = .2),
        min_length = 5,
        use_balancer = False
    ),
    full_trainer_config = dict(
        use_sampler = False
    ),
    early_trainer_config = dict(
        use_sampler = False  # this is for giving more weight to wait samples
    ),

    ppo_configs = dict(
        num_envs = 512,
        num_steps = 32,
        observation_dim = [3],
        action_space_dim = None,
        gamma = .99,
        num_mini_batches = 8,
        update_epochs = 2,
        clip_coef = .2,
        entropy_coef = .05,
        value_coef = .5,
        max_grad_norm = 1
    )
)

In [47]:
train_flows,test_flows,ood_flows = getTrainTestOOD(**configs["data_config"], packet_limit= configs["common_config"]["max_length"])

full class distrubation
FT           16420
control      13591
streaming     1759
chat          1244
Name: count, dtype: int64
using no sampling
filtering max_flow_length = 80
post num packet filter class distrubation
streaming    1541
chat          639
control       609
FT            456
Name: count, dtype: int64
------------------------------
train class distrubation
streaming    1218
chat          522
control       492
FT            364
Name: count, dtype: int64
test class distrubation
streaming    323
chat         117
control      117
FT            92
Name: count, dtype: int64


In [64]:
train_dataset = PacketFlowDataset(flows= train_flows,label_to_index= None,aug= configs["dataset_config"]["aug"])
test_dataset = PacketFlowDataset(flows= test_flows,label_to_index= train_dataset.label_to_index)
ood_dataset = PacketFlowDataset(flows= ood_flows, label_to_index= None) if (ood_flows != None and len(ood_flows) != 0) else None
num_labels = len(train_dataset.label_to_index)
configs["full_model_kwargs"]["output_dim"] = num_labels 
configs["early_model_kwargs"]["output_dim"] = num_labels + 1
configs["rewarder_config"]["num_labels"] = num_labels
configs["rewarder_config"]["max_length"] = configs["common_config"]["max_length"]
configs["ppo_configs"]["action_space_dim"] = num_labels + 1

In [65]:
class EarlyEnvironment:
    def __init__(self,dataset,num_classes, reward_lam = .5):
        """
        timeseries
        """
        self.dataset = dataset
        self.num_classes = num_classes
        self.reward_lam = reward_lam
        self.curr_iterator = None 
        self.num_episodes = 0
        self.info = dict(
            reward = 0,
            length = 0
        )
        
    def returnObs(self):
        return self.dataset[self.curr_iterator["index"]]["data"][self.curr_iterator["ts"]]

    def reset(self):
        index = np.random.randint(0, len(self.dataset))
        self.curr_iterator = dict(index = index, ts = 0, reward = 0, length = 0)
        return self.returnObs()
    
    def step(self, action):
        assert action >= 0 and action <= self.num_classes
        label = self.dataset[self.curr_iterator["index"]]["label"]
        total_ts_length = len(self.dataset[self.curr_iterator["index"]]["data"])
        done = self.curr_iterator["ts"] == total_ts_length - 1
        reward = 0
        if action == label:
            reward = 1
            done = True
        elif action == self.num_classes:
            if label == -1:
                reward = 0
            else:
                reward = -self.reward_lam/total_ts_length
        else:
            reward  = -1
            done = True
        
        result  = dict(next_obs = self.returnObs(), done = int(done), reward  = reward,label = label, ts = self.curr_iterator["ts"])

        self.curr_iterator["ts"] += 1
        self.curr_iterator["reward"] += reward
        self.curr_iterator["length"] += 1
        if done == True:
            self.num_episodes += 1

            self.info["reward"] = (self.info["reward"]*(self.num_episodes - 1) +  self.curr_iterator["reward"])/self.num_episodes
            self.info["length"] = (self.info["length"]*(self.num_episodes - 1) +  self.curr_iterator["length"])/self.num_episodes
            self.reset()
        

        return result



class EarlyEnvs:
    def __init__(self, early_environments : List[EarlyEnvironment]):
        self.early_environments = early_environments
    
    def returnObs(self):
        obs = list(map(lambda x : x.returnObs(), self.early_environments))
        return np.array(obs)

    def reset(self):
        for early_environment in self.early_environments:
            early_environment.reset()
        return self.returnObs()
    
    def step(self,actions):
        assert len(actions) == len(self.early_environments)

        agg_results = []

        for i in range(len(actions)):
            agg_results.append(self.early_environments[i].step(action=actions[i]))
        
        keys = list(agg_results[0].keys())
        master_agg = dict()
        for key in keys:
            master_agg[key] = []
            for agg_result in agg_results:
                master_agg[key].append(agg_result[key])
            master_agg[key] = np.array(master_agg[key])
        
        return master_agg
    
    def getInfo(self):
        reward, length = 0,0
        for env in self.early_environments:
            reward += env.info["reward"]
            length += env.info["length"]

        return dict(
            reward = reward/len(self.early_environments),
            length = length/len(self.early_environments)
        )


In [66]:
class PPOAgent(BaseLSTMNetwork):
    def __init__(self, lstm_input_size, lstm_hidden_size, output_dim, layers=1) -> None:
        super().__init__(lstm_input_size, lstm_hidden_size,layers)

        for name, param in self.lstm.named_parameters():
            if "bias" in name:
                nn.init.constant_(param, 0)
            elif "weight" in name:
                nn.init.orthogonal_(param, 1.0)

        self.action_linear = nn.Sequential(nn.Linear(lstm_hidden_size, lstm_hidden_size//2), nn.Tanh(),
                                           nn.Linear(lstm_hidden_size//2, lstm_hidden_size//2), nn.Tanh(),
                                           nn.Linear(lstm_hidden_size//2, output_dim)
                                            )
        self.value_linear = nn.Sequential(nn.Linear(lstm_hidden_size, lstm_hidden_size//2), nn.Tanh(),
                                           nn.Linear(lstm_hidden_size//2, lstm_hidden_size//2), nn.Tanh(),
                                           nn.Linear(lstm_hidden_size//2, 1)
                                            )

    def getStates(self,x,lstm_state,done):
        """
        x is (seq,BS,3)
        done is (seq,BS)
        Seq_len is 1 when I am rolling out
        where BS is the number of environments
        """
        batch_size = lstm_state[0].shape[1]

        x = x.reshape(-1,batch_size,self.lstm.input_size).permute((1,0,2))
        done = done.reshape(-1,batch_size).permute((1,0))   # for the case where 
        output = []
        seq_len = x.shape[1]

        for i in range(seq_len):

            lstm_hidden,lstm_state = self.lstm(x[:,i:i+1,:],
                (
                    (1.0 - done[:,i]).view(1, -1, 1) * lstm_state[0],
                    (1.0 - done[:,i]).view(1, -1, 1) * lstm_state[1],
                )

            # lstm_hidden is (BS,1,lstm_output_size)
            )

            output.append(lstm_hidden.permute((1,0,2)))
        
        output_hidden = torch.flatten(torch.cat(output),0,1)
        return output_hidden,lstm_state


    def getValue(self, x, lstm_state, done):
        hidden, _ = self.getStates(x, lstm_state, done)
        return self.value_linear(hidden)

    def getActionAndValue(self, x, lstm_state, done, action=None):
        """
        x is (BS,obs_dim which is 3 in this case)
        """
        hidden, lstm_state = self.getStates(x, lstm_state, done)
        logits = self.action_linear(hidden)
        probs = Categorical(logits=logits)

        
        if action is None:
            action = probs.sample()
        
        return action, probs.log_prob(action), probs.entropy(), self.value_linear(hidden), lstm_state
    
    def earlyClassificationForward(self,X):
        """
        X is the timeseries input of shape 
        (BS,Seq len, lstm_input_size)
        outputs (BS,seq_len,feature_len)
        """
        with torch.no_grad():
            lstm_out, _ = self.lstm(X)
            return self.action_linear(lstm_out),lstm_out
    
    

In [67]:
class PPOTrainer:
    def __init__(self,envs : EarlyEnvs,agent : PPOAgent,ppo_config : dict, device,logger : Logger, train_dataset,test_dataset, ood_dataset = None):
        self.envs = envs
        self.ppo_config = ppo_config
        self.agent = agent.to(device)
        self.device = device
        assert len(self.envs.early_environments) == ppo_config["num_envs"]
        self.storage = self._initializeStorage()
        self.logger = logger
        self.evaluator = EarlyEvaluation(min_steps= 0, device= device,model= self.agent)

        self.train_dataset = train_dataset
        self.test_dataset = test_dataset
        self.ood_dataset = ood_dataset

        self.best = dict(
            score = 0,
            model = None
        )

        self.logger.setMetricReportSteps(metric_name= "test_eval_f1", step_size= 1)
        self.logger.setMetricReportSteps(metric_name= "train_eval_f1", step_size= 1)
        self.logger.setMetricReportSteps(metric_name= "train_eval_time", step_size= 1)
        self.logger.setMetricReportSteps(metric_name= "test_eval_time", step_size= 1)
        self.logger.setMetricReportSteps(metric_name= "ood_eval", step_size= 1)
        self.logger.setMetricReportSteps(metric_name= "ood_eval_time", step_size= 1)
        self.logger.setMetricReportSteps(metric_name= "incorrect_ood_test", step_size= 1)
        self.logger.setMetricReportSteps(metric_name= "incorrect_ood_train", step_size= 1)
        self.logger.setMetricReportSteps(metric_name= "avg_reward", step_size= 1)
        self.logger.setMetricReportSteps(metric_name= "avg_length", step_size= 1)
        
    def _initializeStorage(self):
        config = self.ppo_config
        num_steps,num_envs,observation_dim, action_space_dim = config["num_steps"], config["num_envs"], config["observation_dim"], config["action_space_dim"]

        obs = torch.zeros([num_steps, num_envs] + observation_dim).to(self.device)
        actions  = torch.ones([num_steps,num_envs]).to(self.device)*-1
        log_probs = torch.zeros([num_steps,num_envs]).to(self.device)
        rewards = torch.zeros([num_steps,num_envs]).to(self.device)
        dones = torch.zeros([num_steps,num_envs]).to(self.device)
        values = torch.zeros([num_steps,num_envs]).to(self.device)

        return  dict(
            obs = obs, actions = actions, log_probs = log_probs,
            rewards = rewards, dones = dones,values = values
        )


    def _calcAdvantage(self,next_obs,next_lstm_state,next_done):
        num_steps = self.ppo_config["num_steps"]
        rewards = self.storage["rewards"]
        with torch.no_grad():
            # originally next_value is (num_envs,1) dimentional
            next_value = self.agent.getValue(
                    next_obs,
                    next_lstm_state,
                    next_done,
                )[:,0]
        
        returns = torch.zeros_like(rewards).to(self.device)
        for t in reversed(range(num_steps)):
            if t == num_steps - 1:
                next_non_terminal = 1 - next_done
                next_return = next_value
            else:
                next_non_terminal = 1 - self.storage["dones"][t+1] # t+1 as the does are shifted ( hence next done)
                next_return = returns[t+1]
            returns[t] = rewards[t] + self.ppo_config["gamma"]*next_non_terminal*next_return
        
        return returns , returns - self.storage["values"]
            
            
    def _updateModel(self, agent_optimizer):
        num_envs, num_mini_batches = self.ppo_config["num_envs"], self.ppo_config["num_mini_batches"]
        num_steps = self.ppo_config["num_steps"]
        assert num_envs%num_mini_batches == 0

        # by reshaping -1 we get [t1,t1,t1,t1... num_envs times, t2,t2,t2....num_envs times]
        b_obs = self.storage["obs"].reshape(-1, *self.ppo_config["observation_dim"])
        b_log_probs = self.storage["log_probs"].reshape(-1)
        b_actions = self.storage["actions"].reshape(-1)
        b_dones = self.storage["dones"].reshape(-1)
        b_advantages = self.storage["advantages"].reshape(-1)
        b_returns = self.storage["returns"].reshape(-1)
        b_values = self.storage["values"].reshape(-1)

        envs_per_batch = num_envs//num_mini_batches
        env_indices = np.arange(num_envs) # (0,1,2,3) for nenvs = 4
        flat_indices = np.arange(num_envs*num_steps).reshape(num_steps,num_envs)  # [(0,1,2,3),(4,5,6,7), .....] for nenvs = 4

        for epoch in range(self.ppo_config["update_epochs"]):
            np.random.shuffle(env_indices)

            for start in range(0,num_envs,envs_per_batch):
                end = start + envs_per_batch
                mb_env_indices = env_indices[start : end]
                mb_indices = flat_indices[:,mb_env_indices].ravel()
                _, new_log_prob, entropy, newvalue, _ = self.agent.getActionAndValue(
                    x = b_obs[mb_indices],
                    lstm_state= (
                        torch.zeros(self.agent.lstm.num_layers,envs_per_batch, self.agent.lstm.hidden_size).to(self.device),
                        torch.zeros(self.agent.lstm.num_layers,envs_per_batch, self.agent.lstm.hidden_size).to(self.device)
                    ),
                    done= b_dones[mb_indices],
                    action= b_actions.long()[mb_indices]
                )

                

                logratio = new_log_prob - b_log_probs[mb_indices]
                ratio = logratio.exp()
                mb_advantages = b_advantages[mb_indices]
                mb_advantages = (mb_advantages - mb_advantages.mean()) / (mb_advantages.std() + 1e-8)

                # Policy loss
                pg_loss1 = -mb_advantages * ratio
                pg_loss2 = -mb_advantages * torch.clamp(ratio, 1 - self.ppo_config["clip_coef"], 1 + self.ppo_config["clip_coef"])
                pg_loss = torch.max(pg_loss1, pg_loss2).mean()

                newvalue = newvalue.view(-1)
                
                v_loss = 0.5 * ((newvalue - b_returns[mb_indices]) ** 2).mean()


                entropy_loss = entropy.mean()

                loss = pg_loss - self.ppo_config["entropy_coef"] * entropy_loss + v_loss * self.ppo_config["value_coef"]
                
                self.logger.addMetric(metric_name= "pg_loss" , value= pg_loss.item())
                self.logger.addMetric(metric_name= "value_loss", value= v_loss.item())
                self.logger.addMetric(metric_name= "entropy_loss", value= entropy_loss.item())
                agent_optimizer.zero_grad()
                loss.backward()
                nn.utils.clip_grad_norm_(self.agent.parameters(), self.ppo_config["max_grad_norm"])
                agent_optimizer.step()



    def runUpdate(self, agent_optimizer):
        num_steps,num_envs = self.ppo_config["num_steps"], self.ppo_config["num_envs"]
        next_obs = torch.tensor(self.envs.reset()).to(device).float()
        next_done = torch.zeros(self.ppo_config["num_envs"]).to(device)

        next_lstm_state = (
        torch.zeros(self.agent.lstm.num_layers, num_envs, self.agent.lstm.hidden_size).to(device),
        torch.zeros(self.agent.lstm.num_layers, num_envs, self.agent.lstm.hidden_size).to(device),
        )

        for step in range(num_steps):
            self.storage["obs"][step] = next_obs
            self.storage["dones"][step] = next_done

            with torch.no_grad():
                action,log_prob,_,value,next_lstm_state = self.agent.getActionAndValue(x = next_obs,lstm_state= next_lstm_state,done= next_done)

                # put episode logging code here
                self.storage["actions"][step] = action
                self.storage["log_probs"][step] = log_prob
                self.storage["values"][step] = value.flatten()

                env_step = self.envs.step(action.cpu().numpy())
                next_obs, reward, done = env_step["next_obs"], env_step["reward"], env_step["done"]
                self.storage["rewards"][step] = torch.tensor(reward).to(self.device).float()

                next_obs, next_done = torch.tensor(next_obs).to(self.device).float(), torch.tensor(done).to(self.device)

        returns, advantages = self._calcAdvantage(next_obs= next_obs, next_done= next_done, next_lstm_state= next_lstm_state)
        self.storage["advantages"] = advantages
        self.storage["returns"] = returns

        self._updateModel(agent_optimizer= agent_optimizer)

    def eval(self,dataset : BaseFlowDataset):
        metrices = self.evaluator.getMetrices(dataset= dataset,ood_dataset= None)
        return metrices["macro_f1"],metrices["time"],metrices["incorrect_ood"]
    
    def evalOOD(self):
        metrices = self.evaluator.getMetrices(ood_dataset= self.ood_dataset, dataset= None)
        self.logger.addMetric(metric_name= "ood_eval", value= metrices["ood_accuracy"])
        self.logger.addMetric(metric_name= "ood_eval_time", value= metrices["ood_time"])

    def evalTrain(self):
        f1,average_time,incorrect_ood = self.eval(dataset= self.train_dataset)
        self.logger.addMetric(metric_name= "train_eval_f1", value= f1)
        self.logger.addMetric(metric_name= "train_eval_time", value= average_time)
        self.logger.addMetric(metric_name= "incorrect_ood_train", value = incorrect_ood)

    def evalTest(self):
        f1,average_time,incorrect_ood = self.eval(dataset= self.test_dataset)

        if f1 >= self.best["score"]:
            self.best["score"] = f1
            self.best["model"] = deepcopy(self.agent)
        
        self.logger.addMetric(metric_name= "test_eval_f1", value= f1)
        self.logger.addMetric(metric_name= "test_eval_time", value= average_time)
        self.logger.addMetric(metric_name= "incorrect_ood_test", value= incorrect_ood)

    def train(self, num_updates = 1, lr = 3e-4):
        
        agent_optimizer = torch.optim.Adam(params= self.agent.parameters(), lr= lr)
        for update in range(num_updates):
            self.storage = self._initializeStorage()
            self.runUpdate(agent_optimizer = agent_optimizer)
            if update%100 == 0:
                self.evalTest()
                info = self.envs.getInfo()

                self.logger.addMetric(metric_name= "avg_reward", value= info["reward"])
                self.logger.addMetric(metric_name= "avg_length", value= info["length"])
                if self.ood_dataset != None:
                    self.evalOOD()
            if update%2000 == 0:
                self.evalTrain()

            
            

In [68]:
early_envs = []

for _ in range(configs["ppo_configs"]["num_envs"]):
    early_envs.append(EarlyEnvironment(dataset= train_dataset, num_classes= len(train_dataset.label_to_index)))

envs = EarlyEnvs(early_environments= early_envs)
agent = PPOAgent(**configs["early_model_kwargs"]).to(device)

In [69]:
logger = Logger(name= "PPO")
logger.default_step_size = 1500

trainer = PPOTrainer(envs= envs, agent= agent, ppo_config= configs["ppo_configs"], device= device, train_dataset= train_dataset,
                     logger= logger, test_dataset= test_dataset, ood_dataset= None)

In [70]:
trainer.train(num_updates= 100000)

PPO ---- 1 metric test_eval_f1 = 0.1661522633744856
PPO ---- 1 metric test_eval_time = 0.0
PPO ---- 1 metric incorrect_ood_test = 0.0
PPO ---- 1 metric avg_reward = -0.49894022414793104
PPO ---- 1 metric avg_length = 1.2339967131060683
PPO ---- 1 metric train_eval_f1 = 0.15967488201363397
PPO ---- 1 metric train_eval_time = 0.0
PPO ---- 1 metric incorrect_ood_train = 0.0
PPO ---- 1500 metric pg_loss = -0.0016347655194501082
PPO ---- 1500 metric value_loss = 0.4859256957570712
PPO ---- 1500 metric entropy_loss = 0.6610436387260755
PPO ---- 2 metric test_eval_f1 = 0.1661522633744856
PPO ---- 2 metric test_eval_time = 0.0
PPO ---- 2 metric incorrect_ood_test = 0.0
PPO ---- 2 metric avg_reward = -0.09694408895678905
PPO ---- 2 metric avg_length = 1.3585769194285215
PPO ---- 3000 metric pg_loss = -0.00017780265165492893
PPO ---- 3000 metric value_loss = 0.4889269459644953
PPO ---- 3000 metric entropy_loss = 0.5835638108452161
PPO ---- 3 metric test_eval_f1 = 0.1661522633744856
PPO ---- 3 me

KeyboardInterrupt: 

In [187]:
trainer.evalTest()

PPO ---- 12 metric test_eval_f1 = 0.35914683940788333
PPO ---- 12 metric test_eval_time = 4.888388148432256
PPO ---- 12 metric incorrect_ood_test = 0.0
