# Experiment 01

### Training on Google Collab 

In [17]:
# Mount your google drive in google colab
from google.colab import drive
drive.mount('/content/drive', force_remount=True)

Mounted at /content/drive


In [None]:
# Go to base directory
%cd /content/drive/MyDrive/University/Master-Thesis/Code

### Training on Local Machine

In [1]:
import os

original_working_directory_path = os.getcwd()
print("The original working directory is {0}".format(os.getcwd()))

def to_original_working_directory():
    os.chdir(original_working_directory_path)
    print(f"Changed to original working directory {original_working_directory_path}")

The original working directory is /Users/jankreischer/Library/Mobile Documents/com~apple~CloudDocs/Master-Thesis/Code/experiments/jans_experiment_01


In [2]:
def to_root_working_directory():
    root_working_directory_path = os.path.join(original_working_directory_path, "../..")
    os.chdir(root_working_directory_path)
    print(f"Changed to root working directory {os.getcwd()}")

In [3]:
to_root_working_directory()

Changed to root working directory /Users/jankreischer/Library/Mobile Documents/com~apple~CloudDocs/Master-Thesis/Code


In [209]:
import torch
if torch.backends.mps.is_available():
    mps_device = torch.device("mps")
    x = torch.ones(1, device=mps_device)
    print (x)
else:
    print ("MPS device not found.")

tensor([1.], device='mps:0')


In [213]:
import torch

device = None
if torch.backends.mps.is_available():
    device = torch.device("mps")
else:
    print ("MPS device not found.")

print(device)

mps


In [4]:
def seed_random():
    random.seed(42)
    torch.random.manual_seed(42)
    np.random.seed(42)

In [210]:
# Check that MPS is available
if not torch.backends.mps.is_available():
    if not torch.backends.mps.is_built():
        print("MPS not available because the current PyTorch install was not "
              "built with MPS enabled.")
    else:
        print("MPS not available because the current MacOS version is not 12.3+ "
              "and/or you do not have an MPS-enabled device on this machine.")

else:
    mps_device = torch.device("mps")

    # Create a Tensor directly on the mps device
    x = torch.ones(5, device=mps_device)
    # Or
    x = torch.ones(5, device="mps")

    # Any operation happens on the GPU
    y = x * 2

    # Move your model to mps just like any other device
    model = YourFavoriteNet()
    model.to(mps_device)

    # Now every call runs on the GPU
    pred = model(x)

NameError: name 'YourFavoriteNet' is not defined

### 1. Dependencies

In [5]:
# Standard Dependencies
import sys
import os
import numpy as np
from time import time

In [6]:
# Global Dependencies
from src.custom_types import Behavior, MTDTechnique
from src.data_provider import DataProvider
from src.agent import Agent
#from src.simulation_engine import SimulationEngine
from src.evaluation_utils import plot_learning, seed_random, get_pretrained_agent, evaluate_agent_on_afterstates

  from .autonotebook import tqdm as notebook_tqdm


In [70]:
from tabulate import tabulate

def evaluate_agent(agent: Agent, test_data):
    # check predictions with learnt dqn
    agent.online_net.eval()
    res_dict = {}
    objective_dict = {}
    with torch.no_grad():
        for b, d in test_data.items():
            if b != Behavior.NORMAL:
                cnt_corr = 0
                cnt = 0
                for state in d:
                    action = agent.take_greedy_action(state[:-1])
                    if b in supervisor_map[action]:
                        cnt_corr += 1
                    cnt += 1
                res_dict[b] = (cnt_corr, cnt)

            for i in range(len(actions)):
                if b in supervisor_map[i]:
                    objective_dict[b] = actions[i]
    labels = ("Behavior", "Accuracy", "Objective")
    results = []

    for b, t in res_dict.items():
        results.append((b.value, f'{(100 * t[0] / t[1]):.2f}%', objective_dict[b].value))
    print(tabulate(results, headers=labels, tablefmt="orgtbl"))

In [8]:
import matplotlib.pyplot as plt

def plot_learning_curve(title, returns, epsilons):
    #x = range(1, len(returns)+1)
    x = [i + 1 for i in range(N_EPISODES)]
    fig = plt.figure()
    #fig.title(title)
    
    ax = fig.add_subplot(111, label="1")
    ax2 = fig.add_subplot(111, label="2", frame_on=False)
    ax.set_title(title)
    
    ax.plot(x, epsilons, color="C0")
    ax.set_xlabel("Episode", color="C0")
    ax.set_ylabel("Epsilon", color="C0")
    ax.tick_params(axis='x', colors="C0")
    ax.tick_params(axis='y', colors="C0")

    N = len(returns)
    running_avg = np.empty(N)
    for t in range(N):
        running_avg[t] = np.mean(returns[max(0, t - 20):(t + 1)])

    ax2.scatter(x, running_avg, color="C1", s=2 ** 2)
    # ax2.xaxis.tick_top()
    ax2.axes.get_xaxis().set_visible(False)
    ax2.yaxis.tick_right()
    # ax2.set_xlabel('x label 2', color="C1")
    ax2.set_ylabel('Score', color="C1")
    # ax2.xaxis.set_label_position('top')
    ax2.yaxis.set_label_position('right')
    # ax2.tick_params(axis='x', colors="C1")
    ax2.tick_params(axis='y', colors="C1")

    #plt.savefig(filename)

### 2. Hyperparameters

In [9]:
# Hyperparams

# Alternatively two classes ServerHyperparameter and ClientHyperparameter

class Hyperparameter:
    # Server Params
    NR_CLIENTS = 5
    NR_TRAINING_ROUNDS = 2
    # The number of epochs per training round is determined implicitely
    
    # Client Params
    GAMMA = 0.1 #0.99 # discount factor for future rewards
    BATCH_SIZE = 100
    BUFFER_SIZE = 500
    MIN_REPLAY_SIZE = 100
    # Epsilon determining Exploration Exploitation Trade Off
    EPSILON_START = 1.0
    EPSILON_DEC = 1e-4
    EPSILON_END = 0.01

    TARGET_UPDATE_FREQ = 100
    LEARNING_RATE = 1e-4
    N_EPISODES = 10000
    LOG_FREQ = 100
    DIMS = 20
    PI = 3

In [10]:
# Hyperparams

GAMMA = 0.1 #0.99 # discount factor for future rewards
BATCH_SIZE = 100
N_EPISODES = 10000

NR_ROUNDS = 10
NR_EPISODES_PER_ROUND = 1000

BUFFER_SIZE = 500
MIN_REPLAY_SIZE = 100
# Epsilon determining Exploration Exploitation Trade Off
EPSILON_START = 1.0
EPSILON_DEC = 5e-3 # 0.005
EPSILON_END = 0.01

TARGET_UPDATE_FREQ = 100
LEARNING_RATE = 1e-4
LOG_FREQ = 100
DIMS = 20
PI = 3

### 3. Training

#### Tasks
##### Done
- Why are we not using softmax in the last layer of the agent? [o]
- Check timos epsilon decay strategy [✓]
- Introduce verbosity levels [✓]

##### Open
- Check the format of training data
- Validate the copying of the parameters works properly
- How often do i have to initialze the replay memory?
- Make sure that the weights of the DQN are not being reset by the training
- If you really want to make this work online then the right aggregation strategy is key. Maybe ensemble models would be good.
- Check different federated aggregation strategies for class distinct problems
- Parallelize the training
- Move the training to the GPU.
- Split the dataset presented to each client
- Make the threadding more efficient
- Add softmax to the last layer
- Check how the accuracy/ f1 score was computed by timo
- Check different weight aggregation strategies []
- Check the weighting strategy that Timo used
- Ipmlemtne Multiclass Imbalance Degree (MID) Metric
- Print a graph with overall attack detection accuracy (How many got mitigated and how many did not get mitigated)
- Add f1 score to the model evaluation output
- Check which loss we are using (purpose of binary cross entropy loss)

In [22]:
from typing import Dict, Tuple, List
from src.custom_types import Behavior, MTDTechnique, actions, supervisor_map
import numpy as np
import random


# handles the supervised, online-simulation of episodes
class SensorEnvironment:

    def __init__(self, train_data: Dict[Behavior, np.ndarray] = None):
        print("Recognized Behaviours")
        print(train_data.keys())
        self.train_data = train_data
        self.current_state: np.array = None
        self.observation_space_size: int = len(self.train_data[Behavior.NORMAL][0][:-1])
        self.actions: List[int] = [i for i in range(len(actions))]

    # Returns a randomly selected attack state with non normal behaviour.
    def sample_random_attack_state(self):
        """i.e. for starting state of an episode,
        (with replacement; it is possible that the same sample is chosen multiple times)"""
        rb = random.choice([b for b in self.train_data.keys() if b != Behavior.NORMAL])
        attack_data = self.train_data[rb]
        return attack_data[np.random.randint(attack_data.shape[0], size=1), :]

    # Return random sample with specified behaviour
    def sample_behavior(self, b: Behavior):
        behavior_data = self.train_data[b]
        return behavior_data[np.random.randint(behavior_data.shape[0], size=1), :]

    def step(self, action: int):
        current_behavior = self.current_state.squeeze()[-1]

        if current_behavior in supervisor_map[action]:
            # print("correct mtd chosen according to supervisor")
            new_state = self.sample_behavior(Behavior.NORMAL)
            reward = self.calculate_reward(True)
            isTerminalState = True
        else:
            # print("incorrect mtd chosen according to supervisor")
            new_state = self.sample_behavior(current_behavior)
            reward = self.calculate_reward(False)
            isTerminalState = False

        self.current_state = new_state
        return new_state, reward, isTerminalState

    def reset(self):
        self.current_state = self.sample_random_attack_state()
        return self.current_state

    # TODO: possibly adapt to distinguish between MTDs that are particularly wasteful in case of wrong deployment
    def calculate_reward(self, success):
        """
        this method can be refined to distinguish particularly wasteful MTDs (i.e. Dirtrap penalized harder than rootkit sanitization)
        """
        if success:
            return 1
        else:
            return -1

In [12]:
from typing import Dict
import torch
from torch import nn
import torch.nn.functional as F
import numpy as np
from collections import deque
import random


class DeepQNetwork(nn.Module):
    def __init__(self, lr, input_dims, fc1_dims, fc2_dims,
                 n_actions):
        super(DeepQNetwork, self).__init__()
        self.input_dims = input_dims
        self.fc1_dims = fc1_dims
        self.fc2_dims = fc2_dims
        self.n_actions = n_actions

        # Layers
        self.fc1 = nn.Linear(self.input_dims, self.fc1_dims)
        self.fc2 = nn.Linear(self.fc1_dims, self.fc2_dims)
        self.fc3 = nn.Linear(self.fc2_dims, self.n_actions)
        #self.softmax = torch.nn.Softmax()
        # Why are we not using softmax in the last layer

        self.optimizer = torch.optim.Adam(self.parameters(), lr=lr)
        self.loss = nn.MSELoss()
        self.device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
        self.to(self.device)

    def forward(self, state):
        x = F.relu(self.fc1(state))
        x = F.relu(self.fc2(x))
        x = F.softmax(self.fc3(x))
        return x


class Agent:
    def __init__(self, agent_id: int, input_dims: int, n_actions, batch_size,
                 lr, gamma, epsilon, eps_end=0.02, eps_dec=1e-4, buffer_size=100000):
        self.agent_id = agent_id
        self.gamma = gamma
        self.epsilon = epsilon
        self.eps_min = eps_end
        self.eps_dec = eps_dec
        self.lr = lr
        self.action_space = [i for i in range(n_actions)]

        self.episode_action_memory = set()
        self.replay_buffer = deque(maxlen=buffer_size)
        self.reward_buffer = deque([0.0], maxlen=100)  # for printing progress

        self.batch_size = batch_size

        self.online_net = DeepQNetwork(lr, n_actions=n_actions,
                                       input_dims=input_dims,
                                       fc1_dims=60, fc2_dims=30)
        self.target_net = DeepQNetwork(lr, n_actions=n_actions,
                                       input_dims=input_dims,
                                       fc1_dims=60, fc2_dims=30)
        self.target_net.load_state_dict(self.online_net.state_dict())

    def choose_action(self, observation):
        try:
            if np.random.random() > self.epsilon:
                #
                action = self.take_greedy_action(observation)
                if action in self.episode_action_memory:
                    action = np.random.choice(list(set(self.action_space).difference(self.episode_action_memory)))
            else:
                action = np.random.choice(list(set(self.action_space).difference(self.episode_action_memory)))
            self.episode_action_memory.add(action)
        except ValueError:
            return -1
        return action

    def take_greedy_action(self, observation):
        state = torch.from_numpy(observation.astype(np.float32)).to(self.online_net.device)
        actions = self.online_net.forward(state)
        action = torch.argmax(actions).item()
        return action

    def learn(self):
        # init data batch from memory replay for dqn
        transitions = random.sample(self.replay_buffer, self.batch_size)
        b_obses = np.stack([t[0].astype(np.float32).squeeze(0) for t in transitions], axis=0)
        b_actions = np.asarray([t[1] for t in transitions]).astype(np.int64)
        b_rewards = np.asarray([t[2] for t in transitions]).astype(np.int16)
        b_new_obses = np.stack([t[3].astype(np.float32).squeeze(0) for t in transitions], axis=0)
        b_dones = np.asarray([t[4] for t in transitions]).astype(np.int16)
        t_obses = torch.from_numpy(b_obses).to(self.target_net.device)
        t_actions = torch.from_numpy(b_actions).to(self.target_net.device)
        t_rewards = torch.from_numpy(b_rewards).to(self.target_net.device)
        t_new_obses = torch.as_tensor(b_new_obses).to(self.target_net.device)
        t_dones = torch.as_tensor(b_dones).to(self.target_net.device)

        # compute targets
        target_q_values = self.target_net(t_new_obses)
        max_target_q_values = torch.max(target_q_values, dim=1)[0]

        targets = (t_rewards + self.gamma * (1 - t_dones) * max_target_q_values).unsqueeze(1)

        # compute loss
        q_values = self.online_net(t_obses)
        taken_action_q_values = torch.gather(input=q_values, dim=1, index=t_actions.unsqueeze(1))

        loss = self.online_net.loss(taken_action_q_values, targets).to(self.target_net.device)

        # gradient descent
        self.online_net.optimizer.zero_grad()
        loss.backward()
        self.online_net.optimizer.step()

        # epsilon decay
        self.epsilon = self.epsilon - self.eps_dec if self.epsilon > self.eps_min else self.eps_min

    def update_target_network(self):
        self.target_net.load_state_dict(self.online_net.state_dict())

    def get_weights(self):
        print(f"SERVER <--- WEIGHTS --- AGENT {self.agent_id}")
        return copy.deepcopy(self.target_net.state_dict())
    
    def update_weights(self, model_params):
        print(f"SERVER --- WEIGHTS ---> AGENT {self.agent_id}")
        self.online_net.load_state_dict(copy.deepcopy(model_params))
        self.target_net.load_state_dict(copy.deepcopy(model_params))
        
    '''
    def save_agent_state(self, n: int, directory: str):
        torch.save({
            'online_net_state_dict': self.online_net.state_dict(),
            'target_net_state_dict': self.target_net.state_dict(),
            'batch_size': self.batch_size,
            'replay_buffer': self.replay_buffer,
            'reward_buffer': self.reward_buffer,
            'action_space': self.action_space,
            'gamma': self.gamma,
            'eps': self.epsilon,
            'eps_min': self.eps_min,
            'eps_dec': self.eps_dec,
            'lr': self.lr
        }, f"{directory}/trained_models/agent_{n}.pth")

        #torch.save(self.online_net.state_dict(), f"offline_prototype_2_raw_behaviors/trained_models/online_net_{n}.pth")
        #torch.save(self.target_net.state_dict(), f"offline_prototype_2_raw_behaviors/trained_models/target_net_{n}.pth")
    '''

In [13]:
import torch
from torch import nn, optim
import copy
#from torch.utils.data import DataLoader as DataProvider


class Client:
        # memory buffer is influenced by env.step -> resetting to previous action, which may result in unbalanced training
    def __init__(self, client_id: int, agent: Agent, environment: SensorEnvironment):
        self.client_id = client_id
        self.agent = agent
        self.environment = environment
        self.episode_returns = [] 
        self.eps_history = []
    
    '''
    test
    '''
    def init_replay_memory(self, min_size):
        obs = self.environment.reset()
        episode_action_memory = []
        i = 0
        while i < min_size:
            try:
                action = np.random.choice(list({0,1,2,3}.difference(episode_action_memory)))
                episode_action_memory.append(action)
            except ValueError:
                obs = self.environment.reset()
                episode_action_memory = []
                # results in slightly less entries than min_size
                print("exhausted all mtd techniques")
                continue
            i += 1

            new_obs, reward, done = self.environment.step(action)
            idx1 = -1 if obs[0, -1] in Behavior else -2
            idx2 = -1 if new_obs[0, -1] in Behavior else -2
            transition = (obs[:, :idx1], action, reward, new_obs[:, :idx2], done)
            self.agent.replay_buffer.append(transition)

            obs = new_obs
            if done:
                obs = self.environment.reset()
                episode_action_memory = []

    def train_agent(self, num_episodes, t_update_freq, verbose=False):
        episode_returns, eps_history = [], []
        step = 0
        for i in range(num_episodes):
            episode_return = 0
            episode_steps = 0
            done = False
            obs = self.environment.reset()
            while not done:
                idx1 = -1 if obs[0, -1] in Behavior else -2
                action = self.agent.choose_action(obs[:, :idx1])
                if action == -1:
                    print("Agent exhausted all MTD techniques upon behavior: ", obs[0, -1])
                    self.agent.episode_action_memory = set()
                    done = True
                    continue

                new_obs, reward, done = self.environment.step(action)
                idx2 = -1 if new_obs[0, -1] in Behavior else -2
                episode_return += reward
                self.agent.replay_buffer.append((obs[:, :idx1], action, reward,
                                            new_obs[:, :idx2], done))
                self.agent.reward_buffer.append(reward)
                if done:
                    self.agent.episode_action_memory = set()

                self.agent.learn()
                obs = new_obs

                episode_steps += 1
                # update target network
                step += 1
                if step % t_update_freq == 0:
                    self.agent.update_target_network()

                # if step % LOG_FREQ == 0:
                # print("Episode: ", i, "Step: ", step, ", Avg Reward: ", np.mean(agent.reward_buffer), "epsilon: ", agent.epsilon)

            self.episode_returns.append(episode_return / episode_steps)
            avg_episode_return = np.mean(episode_returns[-10:])
            self.eps_history.append(self.agent.epsilon)
            
            if verbose:
                print('| agent %d' % self.agent.agent_id,
                  '| episode ', i, '| episode_return %.2f' % episode_returns[-1],
                  '| average episode_return %.2f' % avg_episode_return,
                  '| epsilon %.2f' % self.agent.epsilon)
            #if i >= num_episodes - 6:
                #print(episode_returns[-10:])
                
            #self.episode_returns+=episode_returns
            #self.eps_history+=eps_history
        return episode_returns, eps_history
        
    def get_training_summary(self):
        return self.episode_returns, self.eps_history
    
    def receive_weights(self, model_params):
        """ Receive aggregated parameters, update model """
        #self.agent.load_state_dict(copy.deepcopy(model_params))
        self.agent.update_weights(model_params)
        
    def get_weights(self):
        return self.agent.get_weights()

In [68]:
import torch
from torch import nn
import copy
from typing import List, Dict
import threading
import numpy as np

class Server:
    def __init__(self, global_agent: Agent, test_data, nr_rounds = NR_ROUNDS, parallelized=False, verbose=True):
        self.clients = []
        self.global_agent = global_agent
        self.test_data = test_data
        self.nr_rounds = nr_rounds
        self.parallelized = parallelized
        self.verbose = verbose
      
    def aggregate_weights(self):
        print("=== AGGREGATING WEIGHTS ===")
        client_params = {client.client_id: client.get_weights() for client in self.clients}
        new_params = copy.deepcopy(next(iter(client_params.values())))  # names
        for name in new_params:
            new_params[name] = torch.zeros(new_params[name].shape)
        for client_id, params in client_params.items():
            client_weight = 1/len(self.clients)
            for name in new_params:
                new_params[name] += params[name] * client_weight  # averaging
        #set new parameters to global model
        self.global_agent.update_weights(new_params)
        #print(new_params)
        return new_params
         
    def broadcast_weights(self):
        """ Send to all clients """
        for client in self.clients:
            client.receive_weights(self.global_agent.get_weights())

    def add_client(self, client: Client):
        self.clients.append(client)
      
    def training_dist(self, verbose=False):
        for nr_round in range(self.nr_rounds):
            if self.verbose:
                print(f">>> SERVER TRAINING ROUND {nr_round + 1}/{self.nr_rounds} <<<")
            for client in self.clients:
                print(f"> AGENT {client.client_id} TRAINING ROUND {nr_round + 1}/{self.nr_rounds} <")
                client.receive_weights(self.global_agent.get_weights())
                
                if self.parallelized:
                    # Parallel training
                    threads = []
                    for client in self.clients:
                        #client.agent.model.share_memory()
                        t = threading.Thread(target=Client.train_agent, args=(client, 1000, 100))
                        t.start()
                        threads.append(t)
                    for t in threads:
                        t.join()
                else:
                    # Sequential training
                    for client in self.clients:
                        client.train_agent(1000, 100, verbose=verbose)
            
            self.aggregate_weights()
            print(f">> EVALUATION TRAINING ROUND {nr_round + 1}/{self.nr_rounds} <<")
            for client in self.clients:
                print(f"AGENT {client.client_id}")
                evaluate_agent(client.agent, self.test_data)
            # When every client is done training then we can aggregate weights
            print(f"GLOBAL AGENT")
            evaluate_agent(self.global_agent, self.test_data)
            
    def plot_learning_curves(self):
        for client in self.clients:
            episode_returns, eps_history = client.get_training_summary()
            plot_learning_curve(f"{client.client_id}", episode_returns, eps_history)
     
    '''
    def compute_acc(self):
        self.global_model.eval()

        test_loss = 0
        nr_correct = 0
        len_test_data = 0
        for attributes, labels in self.test_data:
            labels.to(self.device)
            features = attributes.float().to(self.device)
            outputs = self.global_model(features).to(self.device)
            # accuracy
            if self.data == 'MNIST' or self.data == 'FEMNIST':
                pred_labels = torch.argmax(outputs, dim=1).to(self.device)
            elif self.data == 'MED':
                pred_labels = torch.round(outputs).to(self.device)
            else:
                raise NotImplementedError
            nr_correct += torch.eq(pred_labels.to(self.device), labels.to(self.device)).type(torch.uint8).sum().item()
            len_test_data += len(attributes)
            # loss
            test_loss += self.criterion(outputs.to(self.device), labels.to(self.device))

        return nr_correct / len_test_data, test_loss.item()
    '''

In [15]:
'''
num = 0
#agent.save_agent_state(0, f"{experiment_base_dir}")

x = [i + 1 for i in range(N_EPISODES)]
filename = f"{experiment_base_dir}/mtd_agent_p1.pdf"
plot_learning(x, episode_returns, eps_history, filename)

# check predictions with dqn from trained and stored agent
pretrained_agent = get_pretrained_agent(path=f"{experiment_base_dir}/trained_models/agent_{num}.pth",
                                        input_dims=environment_01.observation_space_size, n_actions=len(environment_01.actions),
                                        buffer_size=BUFFER_SIZE)
# check predictions with learnt dqn
evaluate_agent(pretrained_agent, test_data=test_data)
ha
# check scaling if uncommented
# print("evaluate p1 agent on 'real' decision and afterstate data:")
# dtrain, dtest, atrain, atest = DataProvider.get_reduced_dimensions_with_pca_ds_as(DIMS,
#                                                                                   dir=f"{experiment_base_dir}/")
# evaluate_agent(agent=pretrained_agent, test_data=dtest)
# evaluate_agent_on_afterstates(agent=pretrained_agent, test_data=atest)
'''

'\nnum = 0\n#agent.save_agent_state(0, f"{experiment_base_dir}")\n\nx = [i + 1 for i in range(N_EPISODES)]\nfilename = f"{experiment_base_dir}/mtd_agent_p1.pdf"\nplot_learning(x, episode_returns, eps_history, filename)\n\n# check predictions with dqn from trained and stored agent\npretrained_agent = get_pretrained_agent(path=f"{experiment_base_dir}/trained_models/agent_{num}.pth",\n                                        input_dims=environment_01.observation_space_size, n_actions=len(environment_01.actions),\n                                        buffer_size=BUFFER_SIZE)\n# check predictions with learnt dqn\nevaluate_agent(pretrained_agent, test_data=test_data)\nha\n# check scaling if uncommented\n# print("evaluate p1 agent on \'real\' decision and afterstate data:")\n# dtrain, dtest, atrain, atest = DataProvider.get_reduced_dimensions_with_pca_ds_as(DIMS,\n#                                                                                   dir=f"{experiment_base_dir}/")\n# evaluate_a

In [44]:
experiment_base_dir = "experiments/jans_experiment_01"

seed_random()
start = time()

# read in all preprocessed data for a simulated, supervised environment to sample from
train_data, test_data, _ = DataProvider.get_scaled_train_test_split(scaling_minmax=True, scale_normal_only=True)
# train_data, test_data = DataProvider.get_reduced_dimensions_with_pca(DIMS, pi=PI, normal_only=True)
environment_01 = SensorEnvironment(train_data)
environment_02 = SensorEnvironment(train_data)

#print(type(train_data))
#print(train_data.keys())
#print("---")
#print(len(train_data[Behavior.NORMAL]))
#print(train_data[Behavior.NORMAL])
#print("---")
#print(type(test_data))

# The input size is the number of features 
#print("state size: ", environment_01.observation_space_size)

global_agent = Agent(0, input_dims=environment_02.observation_space_size, n_actions=len(environment_02.actions), buffer_size=BUFFER_SIZE,
                batch_size=BATCH_SIZE, lr=LEARNING_RATE, gamma=GAMMA, epsilon=EPSILON_START, eps_end=EPSILON_END, eps_dec=EPSILON_DEC)

agent_01 = Agent(1, input_dims=environment_01.observation_space_size, n_actions=len(environment_01.actions), buffer_size=BUFFER_SIZE,
              batch_size=BATCH_SIZE, lr=LEARNING_RATE, gamma=GAMMA, epsilon=EPSILON_START, eps_end=EPSILON_END, eps_dec=EPSILON_DEC)

agent_02 = Agent(2, input_dims=environment_02.observation_space_size, n_actions=len(environment_02.actions), buffer_size=BUFFER_SIZE,
                batch_size=BATCH_SIZE, lr=LEARNING_RATE, gamma=GAMMA, epsilon=EPSILON_START, eps_end=EPSILON_END, eps_dec=EPSILON_DEC)

server = Server(global_agent, nr_rounds=2, parallelized=False, verbose=True)

client_01 = Client(1, agent_01, environment_01)
client_02 = Client(2, agent_02, environment_02)
# initialize memory replay buffer (randomly)
client_01.init_replay_memory(MIN_REPLAY_SIZE)
client_02.init_replay_memory(MIN_REPLAY_SIZE)

server.add_client(client_01)
server.add_client(client_02)

server.training_dist(verbose=False)

# main training
#episode_returns, eps_history = client_01.learn_agent_offline(num_episodes=N_EPISODES, t_update_freq=TARGET_UPDATE_FREQ)

end = time()
print("Total training time: ", end - start)

/Users/jankreischer/Library/Mobile Documents/com~apple~CloudDocs/Master-Thesis/Code
>>> SERVER TRAINING ROUND 1 <<<
> AGENT 1 TRAINING ROUND 1 <
SERVER <--- WEIGHTS --- AGENT 0
SERVER --- WEIGHTS ---> AGENT 1


  return _methods._mean(a, axis=axis, dtype=dtype,


> AGENT 2 TRAINING ROUND 1 <
SERVER <--- WEIGHTS --- AGENT 0
SERVER --- WEIGHTS ---> AGENT 2
=== AGGREGATING WEIGHTS ===
SERVER <--- WEIGHTS --- AGENT 1
SERVER <--- WEIGHTS --- AGENT 2
SERVER --- WEIGHTS ---> AGENT 0
>>> SERVER TRAINING ROUND 2 <<<
> AGENT 1 TRAINING ROUND 2 <
SERVER <--- WEIGHTS --- AGENT 0
SERVER --- WEIGHTS ---> AGENT 1
> AGENT 2 TRAINING ROUND 2 <
SERVER <--- WEIGHTS --- AGENT 0
SERVER --- WEIGHTS ---> AGENT 2
=== AGGREGATING WEIGHTS ===
SERVER <--- WEIGHTS --- AGENT 1
SERVER <--- WEIGHTS --- AGENT 2
SERVER --- WEIGHTS ---> AGENT 0
Total training time:  41.31780695915222


In [98]:
experiment_base_dir = "experiments/jans_experiment_01"

seed_random()
start = time()

# read in all preprocessed data for a simulated, supervised environment to sample from
train_data, test_data, _ = DataProvider.get_scaled_train_test_split(scaling_minmax=True, scale_normal_only=True)
# train_data, test_data = DataProvider.get_reduced_dimensions_with_pca(DIMS, pi=PI, normal_only=True)
environment_01 = SensorEnvironment(train_data)
environment_02 = SensorEnvironment(train_data)

#print(type(train_data))
#print(train_data.keys())
#print("---")
#print(len(train_data[Behavior.NORMAL]))
#print(train_data[Behavior.NORMAL])
#print("---")
#print(type(test_data))

# The input size is the number of features 
#print("state size: ", environment_01.observation_space_size)

global_agent = Agent(0, input_dims=environment_02.observation_space_size, n_actions=len(environment_02.actions), buffer_size=BUFFER_SIZE,
                batch_size=BATCH_SIZE, lr=LEARNING_RATE, gamma=GAMMA, epsilon=EPSILON_START, eps_end=EPSILON_END, eps_dec=EPSILON_DEC)

agent_01 = Agent(1, input_dims=environment_01.observation_space_size, n_actions=len(environment_01.actions), buffer_size=BUFFER_SIZE,
              batch_size=BATCH_SIZE, lr=LEARNING_RATE, gamma=GAMMA, epsilon=EPSILON_START, eps_end=EPSILON_END, eps_dec=EPSILON_DEC)

agent_02 = Agent(2, input_dims=environment_02.observation_space_size, n_actions=len(environment_02.actions), buffer_size=BUFFER_SIZE,
                batch_size=BATCH_SIZE, lr=LEARNING_RATE, gamma=GAMMA, epsilon=EPSILON_START, eps_end=EPSILON_END, eps_dec=EPSILON_DEC)

server = Server(global_agent, nr_rounds=2, parallelized=True, verbose=True)

client_01 = Client(1, agent_01, environment_01)
client_02 = Client(2, agent_02, environment_02)
# initialize memory replay buffer (randomly)
client_01.init_replay_memory(MIN_REPLAY_SIZE)
client_02.init_replay_memory(MIN_REPLAY_SIZE)

server.add_client(client_01)
server.add_client(client_02)

server.training_dist(verbose=False)

# main training
#episode_returns, eps_history = client_01.learn_agent_offline(num_episodes=N_EPISODES, t_update_freq=TARGET_UPDATE_FREQ)

end = time()
print("Total training time: ", end - start)

/Users/jankreischer/Library/Mobile Documents/com~apple~CloudDocs/Master-Thesis/Code
Recognized Behaviours
dict_keys([<Behavior.NORMAL: 'normal'>, <Behavior.RANSOMWARE_POC: 'ransomware_poc'>, <Behavior.ROOTKIT_BDVL: 'bdvl'>, <Behavior.ROOTKIT_BEURK: 'beurk'>, <Behavior.CNC_THETICK: 'the_tick'>, <Behavior.CNC_BACKDOOR_JAKORITAR: 'backdoor_jakoritar'>, <Behavior.CNC_OPT1: 'data_leak_1'>, <Behavior.CNC_OPT2: 'data_leak_2'>])
Recognized Behaviours
dict_keys([<Behavior.NORMAL: 'normal'>, <Behavior.RANSOMWARE_POC: 'ransomware_poc'>, <Behavior.ROOTKIT_BDVL: 'bdvl'>, <Behavior.ROOTKIT_BEURK: 'beurk'>, <Behavior.CNC_THETICK: 'the_tick'>, <Behavior.CNC_BACKDOOR_JAKORITAR: 'backdoor_jakoritar'>, <Behavior.CNC_OPT1: 'data_leak_1'>, <Behavior.CNC_OPT2: 'data_leak_2'>])


TypeError: __init__() got multiple values for argument 'input_dims'

In [30]:
server.plot_learning_curves()

NameError: name 'server' is not defined

In [69]:
experiment_base_dir = "experiments/jans_experiment_01"

EPSILON_START = 1.0
EPSILON_DEC = 1e-4
EPSILON_END = 0.01
NR_ROUNDS = 10


seed_random()
start = time()

full_train_data, full_test_data, _ = DataProvider.get_scaled_train_test_split(scaling_minmax=True, scale_normal_only=True)

subset_1 = (Behavior.NORMAL, Behavior.RANSOMWARE_POC, Behavior.ROOTKIT_BDVL, Behavior.ROOTKIT_BEURK)
subset_2 = (Behavior.NORMAL, Behavior.CNC_THETICK, Behavior.CNC_BACKDOOR_JAKORITAR, Behavior.CNC_OPT1, Behavior.CNC_OPT2)
sub_train_1 = {x: full_train_data[x] for x in subset_1 if x in full_train_data}
sub_train_2 = {x: full_train_data[x] for x in subset_2 if x in full_train_data}

environment_01 = SensorEnvironment(sub_train_1)
environment_02 = SensorEnvironment(sub_train_2)


global_agent = Agent(0, input_dims=environment_02.observation_space_size, n_actions=len(environment_02.actions), buffer_size=BUFFER_SIZE,
                batch_size=BATCH_SIZE, lr=LEARNING_RATE, gamma=GAMMA, epsilon=EPSILON_START, eps_end=EPSILON_END, eps_dec=EPSILON_DEC)

agent_01 = Agent(1, input_dims=environment_01.observation_space_size, n_actions=len(environment_01.actions), buffer_size=BUFFER_SIZE,
              batch_size=BATCH_SIZE, lr=LEARNING_RATE, gamma=GAMMA, epsilon=EPSILON_START, eps_end=EPSILON_END, eps_dec=EPSILON_DEC)

agent_02 = Agent(2, input_dims=environment_02.observation_space_size, n_actions=len(environment_02.actions), buffer_size=BUFFER_SIZE,
                batch_size=BATCH_SIZE, lr=LEARNING_RATE, gamma=GAMMA, epsilon=EPSILON_START, eps_end=EPSILON_END, eps_dec=EPSILON_DEC)

server = Server(global_agent, full_test_data, nr_rounds=NR_ROUNDS, parallelized=True, verbose=True)

client_01 = Client(1, agent_01, environment_01)
client_02 = Client(2, agent_02, environment_02)
# initialize memory replay buffer (randomly)
client_01.init_replay_memory(MIN_REPLAY_SIZE)
client_02.init_replay_memory(MIN_REPLAY_SIZE)

server.add_client(client_01)
server.add_client(client_02)

server.training_dist(verbose=False)

end = time()
print("Total training time: ", end - start)

/Users/jankreischer/Library/Mobile Documents/com~apple~CloudDocs/Master-Thesis/Code
Recognized Behaviours
dict_keys([<Behavior.NORMAL: 'normal'>, <Behavior.RANSOMWARE_POC: 'ransomware_poc'>, <Behavior.ROOTKIT_BDVL: 'bdvl'>, <Behavior.ROOTKIT_BEURK: 'beurk'>])
Recognized Behaviours
dict_keys([<Behavior.NORMAL: 'normal'>, <Behavior.CNC_THETICK: 'the_tick'>, <Behavior.CNC_BACKDOOR_JAKORITAR: 'backdoor_jakoritar'>, <Behavior.CNC_OPT1: 'data_leak_1'>, <Behavior.CNC_OPT2: 'data_leak_2'>])
>>> SERVER TRAINING ROUND 1/10 <<<
> AGENT 1 TRAINING ROUND 1/10 <
SERVER <--- WEIGHTS --- AGENT 0
SERVER --- WEIGHTS ---> AGENT 1


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


> AGENT 2 TRAINING ROUND 1/10 <
SERVER <--- WEIGHTS --- AGENT 0
SERVER --- WEIGHTS ---> AGENT 2
=== AGGREGATING WEIGHTS ===
SERVER <--- WEIGHTS --- AGENT 1
SERVER <--- WEIGHTS --- AGENT 2
SERVER --- WEIGHTS ---> AGENT 0
>> EVALUATION TRAINING ROUND 1/10 <<
AGENT 1
| Behavior           | Accuracy   | Objective                      |
|--------------------+------------+--------------------------------|
| ransomware_poc     | 98.66%     | ransomware_file_extension_hide |
| bdvl               | 100.00%    | rootkit_sanitizer              |
| beurk              | 100.00%    | rootkit_sanitizer              |
| the_tick           | 0.00%      | cnc_ip_shuffle                 |
| backdoor_jakoritar | 0.00%      | cnc_ip_shuffle                 |
| data_leak_1        | 0.00%      | cnc_ip_shuffle                 |
| data_leak_2        | 0.00%      | cnc_ip_shuffle                 |
AGENT 2
| Behavior           | Accuracy   | Objective                      |
|--------------------+------------+--

In [63]:
experiment_base_dir = "experiments/jans_experiment_01"

EPSILON_START = 1.0
EPSILON_DEC = 1e-4
EPSILON_END = 0.01
NR_ROUNDS = 10


seed_random()
start = time()

full_train_data, full_test_data, _ = DataProvider.get_scaled_train_test_split(scaling_minmax=True, scale_normal_only=True)

subset_1 = (Behavior.NORMAL, Behavior.RANSOMWARE_POC, Behavior.ROOTKIT_BDVL, Behavior.ROOTKIT_BEURK)
subset_2 = (Behavior.NORMAL, Behavior.CNC_THETICK, Behavior.CNC_BACKDOOR_JAKORITAR, Behavior.CNC_OPT1, Behavior.CNC_OPT2)
sub_train_1 = {x: full_train_data[x] for x in subset_1 if x in full_train_data}
sub_train_2 = {x: full_train_data[x] for x in subset_2 if x in full_train_data}

environment_01 = SensorEnvironment(sub_train_1)
environment_02 = SensorEnvironment(sub_train_2)


global_agent = Agent(0, input_dims=environment_02.observation_space_size, n_actions=len(environment_02.actions), buffer_size=BUFFER_SIZE,
                batch_size=BATCH_SIZE, lr=LEARNING_RATE, gamma=GAMMA, epsilon=EPSILON_START, eps_end=EPSILON_END, eps_dec=EPSILON_DEC)

agent_01 = Agent(1, input_dims=environment_01.observation_space_size, n_actions=len(environment_01.actions), buffer_size=BUFFER_SIZE,
              batch_size=BATCH_SIZE, lr=LEARNING_RATE, gamma=GAMMA, epsilon=EPSILON_START, eps_end=EPSILON_END, eps_dec=EPSILON_DEC)

agent_02 = Agent(2, input_dims=environment_02.observation_space_size, n_actions=len(environment_02.actions), buffer_size=BUFFER_SIZE,
                batch_size=BATCH_SIZE, lr=LEARNING_RATE, gamma=GAMMA, epsilon=EPSILON_START, eps_end=EPSILON_END, eps_dec=EPSILON_DEC)

server = Server(global_agent, full_test_data, nr_rounds=NR_ROUNDS, parallelized=False, verbose=True)

client_01 = Client(1, agent_01, environment_01)
client_02 = Client(2, agent_02, environment_02)
# initialize memory replay buffer (randomly)
client_01.init_replay_memory(MIN_REPLAY_SIZE)
client_02.init_replay_memory(MIN_REPLAY_SIZE)

server.add_client(client_01)
server.add_client(client_02)

server.training_dist(verbose=False)

end = time()
print("Total training time: ", end - start)

/Users/jankreischer/Library/Mobile Documents/com~apple~CloudDocs/Master-Thesis/Code
Recognized Behaviours
dict_keys([<Behavior.NORMAL: 'normal'>, <Behavior.RANSOMWARE_POC: 'ransomware_poc'>, <Behavior.ROOTKIT_BDVL: 'bdvl'>, <Behavior.ROOTKIT_BEURK: 'beurk'>])
Recognized Behaviours
dict_keys([<Behavior.NORMAL: 'normal'>, <Behavior.CNC_THETICK: 'the_tick'>, <Behavior.CNC_BACKDOOR_JAKORITAR: 'backdoor_jakoritar'>, <Behavior.CNC_OPT1: 'data_leak_1'>, <Behavior.CNC_OPT2: 'data_leak_2'>])
>>> SERVER TRAINING ROUND 1/10 <<<
> AGENT 1 TRAINING ROUND 1/10 <
SERVER <--- WEIGHTS --- AGENT 0
SERVER --- WEIGHTS ---> AGENT 1


  return _methods._mean(a, axis=axis, dtype=dtype,


> AGENT 2 TRAINING ROUND 1/10 <
SERVER <--- WEIGHTS --- AGENT 0
SERVER --- WEIGHTS ---> AGENT 2
=== AGGREGATING WEIGHTS ===
SERVER <--- WEIGHTS --- AGENT 1
SERVER <--- WEIGHTS --- AGENT 2
SERVER --- WEIGHTS ---> AGENT 0
| Behavior           | Accuracy   | Objective                      |
|--------------------+------------+--------------------------------|
| ransomware_poc     | 0.00%      | ransomware_file_extension_hide |
| bdvl               | 0.00%      | rootkit_sanitizer              |
| beurk              | 2.74%      | rootkit_sanitizer              |
| the_tick           | 99.09%     | cnc_ip_shuffle                 |
| backdoor_jakoritar | 97.89%     | cnc_ip_shuffle                 |
| data_leak_1        | 99.20%     | cnc_ip_shuffle                 |
| data_leak_2        | 100.00%    | cnc_ip_shuffle                 |
>>> SERVER TRAINING ROUND 2/10 <<<
> AGENT 1 TRAINING ROUND 2/10 <
SERVER <--- WEIGHTS --- AGENT 0
SERVER --- WEIGHTS ---> AGENT 1
> AGENT 2 TRAINING ROUND 2/1

KeyboardInterrupt: 

In [78]:
a = torch.randn(4, 4)
print(a)
a[torch.argmax(a)]


tensor([[-1.2109,  0.2877,  0.2205,  1.1144],
        [-0.7684,  0.2043,  0.1833,  0.0543],
        [ 2.3913, -0.1870,  0.4581,  1.1125],
        [ 0.9539, -0.6145,  0.2496, -0.4977]])


IndexError: index 8 is out of bounds for dimension 0 with size 4