In [62]:
import os

import mesa
from mesa.datacollection import DataCollector

import pandas as pd
import numpy as np

# Helper functions

In [52]:
def get_distributions(directory_path):
    # Ensure the directory path ends with a '/'
    if not directory_path.endswith('/'):
        directory_path += '/'

    try:
        all_files = os.listdir(directory_path)
    except FileNotFoundError:
        print(f"Directory {directory_path} not found.")
        return {}

    distributions = {}
    for file in all_files:
        if file.endswith('.parquet'):
            name = file.replace('.parquet', '')
            try:
                data = pd.read_parquet(os.path.join(directory_path, file))
            except Exception as e:
                print(f"Error reading {file}: {e}")
                continue

            var_type = 'cat' if ('service_encoded' in name or 'protocol_type_encoded' in name) else 'num'
            distributions[name] = {'data': data, 'var_type': var_type}

    return distributions

# Load distributions

## Normal

In [53]:
directory_path = f'distributions{os.sep}normal{os.sep}'
normal_agent_dist = get_distributions(directory_path)

## Smurf

In [54]:
directory_path = f'distributions{os.sep}smurf{os.sep}'
smurf_agent_dist = get_distributions(directory_path)

## Neptune

In [55]:
directory_path = f'distributions{os.sep}neptune{os.sep}'
neptune_agent_dist = get_distributions(directory_path)

# Model

In [87]:
class NetworkModel(mesa.Model):
    def __init__(self):
        super().__init__()
        self.schedule = mesa.time.RandomActivation(self)

        self.datacollector = DataCollector(
            agent_reporters={
                "diff_srv_rate": "diff_srv_rate",
                "service_encoded": "service_encoded",
                "same_srv_rate": "same_srv_rate",
                "protocol_type_encoded": "protocol_type_encoded",
                "dst_host_same_src_port_rate": "dst_host_same_src_port_rate",
                "srv_count": "srv_count",
                "src_bytes": "src_bytes",
                "count": "count",
            }
        )

    def step(self):
        self.schedule.step()
        self.datacollector.collect(self)


# Agents

In [88]:
class KDDAgent(mesa.Agent):
    def __init__(self, unique_id, model, distributions, type_):
        super().__init__(unique_id, model)
        self.distributions = distributions
        self.type = type_

        # Variables
        self.conn_vars = None
        self.diff_srv_rate = None
        self.service_encoded = None
        self.same_srv_rate = None
        self.protocol_type_encoded = None
        self.dst_host_same_src_port_rate = None
        self.srv_count = None
        self.src_bytes = None
        self.count = None

    def step(self):
        self.conn_vars = self.get_conn_variables_from_dist()
        self.diff_srv_rate = self.conn_vars['diff_srv_rate']
        self.service_encoded = self.conn_vars['service_encoded']
        self.same_srv_rate = self.conn_vars['same_srv_rate']
        self.protocol_type_encoded = self.conn_vars['protocol_type_encoded']
        self.dst_host_same_src_port_rate = self.conn_vars['dst_host_same_src_port_rate']
        self.srv_count = self.conn_vars['srv_count']
        self.src_bytes = self.conn_vars['src_bytes']
        self.count = self.conn_vars['count']

    def get_conn_variables_from_dist(self):
        
        conn_variables = dict()
        
        for var in self.distributions:
        
            if self.distributions[var]['var_type'] == 'num':
                new_sample = np.random.choice(
                    self.distributions[var]['data']['values'], 
                    size=1, 
                    p=self.distributions[var]['data']['probs']
                )
                conn_variables[var] = new_sample[0]
        
            elif self.distributions[var]['var_type'] == 'cat':
                # Convert probabilities to cumulative probabilities
                temp_df = self.distributions[var]['data'].reset_index().copy()
                temp_df['cumulative_prob'] = temp_df['count'].cumsum()
                # Generate a random number between 0 and 1
                random_number = np.random.rand()
                # Find the index where the random number falls in the cumulative probabilities
                choice_index = temp_df['cumulative_prob'].searchsorted(random_number)
                # # Get the corresponding protocol type
                chosen_option = temp_df[var][choice_index]
            
                conn_variables[var] = chosen_option
                
        return conn_variables


In [89]:
model = NetworkModel()

normal_agent = KDDAgent(unique_id=b'normal.', model=model, distributions=normal_agent_dist, type_='normal')
smurf_agent = KDDAgent(unique_id=b'smurf.', model=model, distributions=smurf_agent_dist, type_='smurf')
neptune_agent = KDDAgent(unique_id=b'neptune.', model=model, distributions=smurf_agent_dist, type_='neptune')

model.schedule.add(normal_agent)
model.schedule.add(smurf_agent)
model.schedule.add(neptune_agent)

In [96]:
iterations = 10000
for i in range(iterations):
    model.step()
    if i%1000==0:
        print(i, iterations)

agent_data = model.datacollector.get_agent_vars_dataframe()

0 10000
1000 10000
2000 10000
3000 10000
4000 10000
5000 10000
6000 10000
7000 10000
8000 10000
9000 10000


In [98]:
agent_data.to_csv('test.csv')