In [2]:
# import pandas as pd
# import numpy as np
# import pickle
# import warnings
# import copy
# import random
# import matplotlib.pyplot as plt
# from collections import Counter
# import joblib

In [None]:
from ipynb.fs.full.Adversarial_NIDS_Environemnt import *

In [59]:
# %matplotlib inline
import math
from collections import namedtuple
from itertools import count
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
import torchvision.transforms as T
from torch.nn import init

In [60]:
list_ind = [i for i in range(len(df))]

In [61]:
## The neural network class with input size = dimension of state space (n_nodes+1), output layer size = 1 (action)

class DQN(nn.Module):
    
    def __init__(self, state_dim):
        super().__init__()
        self.fc1 = nn.Linear(in_features= state_dim , out_features=256)
        init.kaiming_normal_(self.fc1.weight, mode = 'fan_in')
        self.fc2 = nn.Linear(in_features=256, out_features=128)
        self.fc3 = nn.Linear(in_features=128, out_features=64)
        self.out = nn.Linear(in_features=64, out_features= 11)
    
    def forward(self, t):
        t = t.flatten(start_dim = 1)
        t = F.relu(self.fc1(t))
        t = F.relu(self.fc2(t))
        t = F.relu(self.fc3(t))
        t = self.out(t)
        return t

In [62]:
## An experience class to store sate. action. next state, reward information for a particular experience
Experience = namedtuple(
    'Experience',
    ('state', 'action', 'next_state', 'reward')
)

In [63]:
## The replay memory class to store and sample experiences
class ReplayMemory():
    
    def __init__(self, capacity):
        self.capacity = capacity 
        self.memory = []
#         self.win_memory = []
        self.push_count = 0 # Counts how many experiences we have stored
    
    # Method to store experience in the Replay Memory
    def push(self, experience):
        if len(self.memory) < self.capacity:
            self.memory.append(experience)
        else:
            #Stores most recent experience and replaces the oldest experience
            self.memory[self.push_count % self.capacity] = experience
        self.push_count += 1
        
    # Method to sample from the replay memory for a given batch size
    def sample(self, batch_size):
        return random.sample(self.memory, batch_size)
    
    # Method to check whether the replay memory has enough experiences left to sample
    def can_provide_sample(self, batch_size):
        return len(self.memory) >= batch_size

In [64]:
# The Epsilon Greedy Strategy Class
class EpsilonGreedyStrategy():
    def __init__(self, start, end, decay):
        self.start = start
        self.end = end
        self.decay = decay
        
    # Function to get the exploration rate given the step at which the agent is in  
    def get_exploration_rate(self, current_step):
        return self.end + (self.start - self.end) * \
            math.exp(-1. * current_step * self.decay)

In [65]:
# Defining the agent class which needs the epsilon greedy strategy and a device to initialize
class Agent():
    def __init__(self, strategy,device):
        self.current_step = 0
        self.strategy = strategy
        self.device =  device
    
    # Method to select action via exploration or exploitation
    def select_action(self, state_,state, policy_net):
        rate = self.strategy.get_exploration_rate(self.current_step)
        self.current_step += 1

        if rate > random.random():
            action = random_action(state_)
            return torch.tensor([action]).to(self.device) # exploration

        else:
            # Turns the gradient tracking off since we want top use the policy network for inference not for training
            with torch.no_grad():
                return policy_net(state).argmax(dim=1).to(self.device) # exploitaion

In [66]:
## Initializing parameter values
batch_size = 256
gamma = 0.8
eps_start = 1
eps_end = 0.01
eps_decay = 0.00002475
target_update = 10
memory_size = 100000
lr = 0.001
num_episodes = 50000
episode_length = 30
# episode_duration = 400000
# high_val = 19
# adv_startnode = random.randint(0,n_nodes)

In [67]:
torch.cuda.is_available()

True

In [68]:
## Initializing the device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [69]:
## Creating an instance of the epsilon greedy strategy class
strategy = EpsilonGreedyStrategy(eps_start, eps_end, eps_decay)

In [70]:
## Creating an instance of the agent and the replay memory
agent = Agent(strategy,device)
memory = ReplayMemory(memory_size)

In [71]:
state_dim = 1525

In [72]:
## Creating an instance of the DQN class as policy network and cloning the same network as target network
policy_net = DQN(state_dim).to(device)
target_net = DQN(state_dim).to(device)

In [73]:
## Load state dict function is used to update weight parameters for a neural network in pytorch 
target_net.load_state_dict(policy_net.state_dict())
## Changes the mode of the target network to evaluation
target_net.eval()

DQN(
  (fc1): Linear(in_features=1525, out_features=256, bias=True)
  (fc2): Linear(in_features=256, out_features=128, bias=True)
  (fc3): Linear(in_features=128, out_features=64, bias=True)
  (out): Linear(in_features=64, out_features=11, bias=True)
)

In [74]:
optimizer = optim.Adam(params=policy_net.parameters(), lr=lr)

In [75]:
## Changes the data stored in experience class to tensor

def extract_tensors(experiences):
    # Convert batch of Experiences to Experience of batches
    batch = Experience(*zip(*experiences))

    t1 = torch.cat(batch.state)
    t2 = torch.cat(batch.action)
    t3 = torch.cat(batch.reward)
    t4 = torch.cat(batch.next_state)

    return (t1,t2,t3,t4)

In [76]:
class QValues():
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    
    @staticmethod
    def get_current(policy_net, states, actions):
        return policy_net(states).gather(dim = 1, index=actions.unsqueeze(-1))
    
    @staticmethod
    def get_next(target_net, next_states):
        final_state_locations = next_states.flatten(start_dim=1) \
            .max(dim=1)[0].eq(1).type(torch.bool)
        non_final_state_locations = (final_state_locations == False)
        non_final_states = next_states[non_final_state_locations]
        batch_size = next_states.shape[0]
        values = torch.zeros(batch_size).to(QValues.device)
        values[non_final_state_locations] = target_net(non_final_states).max(dim=1)[0].detach()
        return values