In [2]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

from dsmanipulator import dsanalyzer as dsa
from dsmanipulator import dsloader as dsl
from dsmanipulator import dscreator as dsc
from dsmanipulator.utils import FileColumnNames

In [3]:
fcn = FileColumnNames("TimeStamp", "Relative Time", "srcIP", "dstIP", "srcPort", "dstPort")

filename = "../data/scanning-attack.csv"

dialect = dsl.detect_dialect(filename)
df = dsl.load_data(
    filename,
    data_types={
        "TimeStamp": "datetime",
        "Relative Time": "float",
        "srcIP": "object",
        "dstIP": "object",
        "srcPort": "float",
        "dstPort": "float",
        "ipLen": "float",
        "len": "float",
        "fmt": "object",
        "uType": "object",
        "asduType": "float",
        "numix": "float",
        "cot": "float",
        "oa": "float",
        "addr": "float",
        "ioa": "object",
    },
    dialect=dialect,
    row_limit=None,
)

dsc.add_relative_days(df, fcn, inplace=True)
station_ids = dsc.create_station_ids(df, fcn)
dsc.add_station_id(df, fcn, station_ids, inplace=True)
pair_ids = dsc.create_pair_ids(df, fcn)
dsc.add_pair_id(df, fcn, pair_ids, inplace=True)
direction_ids = dsc.create_direction_ids(df, fcn)
dsc.add_direction_id(df, fcn, direction_ids, inplace=True)

Unnamed: 0,TimeStamp,Relative Time,srcIP,dstIP,srcPort,dstPort,ipLen,len,fmt,uType,...,numix,cot,oa,addr,ioa,*Relative Day##,*Source station id##,*Destination station id##,*Pair id##,*Direction id##
0,2022-04-25 14:41:44.980,7.988096,192.168.11.111,192.168.11.248,61254.0,2404.0,46.0,4.0,0x00000003,0x00000001,...,,,,,,0,16,167,37,1
1,2022-04-25 14:41:44.980,7.988457,192.168.11.248,192.168.11.111,2404.0,61254.0,46.0,4.0,0x00000003,0x00000002,...,,,,,,0,167,16,37,0
2,2022-04-25 14:41:45.190,8.191193,192.168.11.248,192.168.11.111,2404.0,61254.0,90.0,21.0,0x00000000,,...,1.0,3.0,0.0,65535.0,67,0,167,16,37,0
3,2022-04-25 14:41:45.190,8.191193,192.168.11.248,192.168.11.111,2404.0,61254.0,90.0,25.0,0x00000000,,...,1.0,3.0,0.0,65535.0,2,0,167,16,37,0
4,2022-04-25 14:41:48.470,11.477121,192.168.11.248,192.168.11.111,2404.0,61254.0,67.0,25.0,0x00000000,,...,1.0,3.0,0.0,65535.0,2,0,167,16,37,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
58922,2022-04-28 10:36:32.950,244495.958036,192.168.11.248,192.168.11.111,2404.0,61254.0,67.0,25.0,0x00000000,,...,1.0,3.0,0.0,65535.0,2,3,167,16,37,0
58923,2022-04-28 10:36:34.970,244497.977239,192.168.11.111,192.168.11.248,61254.0,2404.0,46.0,4.0,0x00000001,,...,,,,,,3,16,167,37,1
58924,2022-04-28 10:36:35.750,244498.756241,192.168.11.248,192.168.11.111,2404.0,61254.0,67.0,25.0,0x00000000,,...,1.0,3.0,0.0,65535.0,2,3,167,16,37,0
58925,2022-04-28 10:36:43.550,244506.557115,192.168.11.248,192.168.11.111,2404.0,61254.0,67.0,25.0,0x00000000,,...,1.0,3.0,0.0,65535.0,2,3,167,16,37,0


In [7]:
df['dstIP'].unique()

array(['192.168.11.248', '192.168.11.111', '192.168.11.1', '192.168.11.2',
       '192.168.11.3', '192.168.11.4', '192.168.11.5', '192.168.11.6',
       '192.168.11.7', '192.168.11.8', '192.168.11.9', '192.168.11.10',
       '192.168.11.11', '192.168.11.12', '192.168.11.13', '192.168.11.14',
       '192.168.11.15', '192.168.11.16', '192.168.11.17', '192.168.11.18',
       '192.168.11.19', '192.168.11.20', '192.168.11.21', '192.168.11.22',
       '192.168.11.23', '192.168.11.24', '192.168.11.25', '192.168.11.26',
       '192.168.11.27', '192.168.11.28', '192.168.11.29', '192.168.11.30',
       '192.168.11.31', '192.168.11.32', '192.168.11.33', '192.168.11.34',
       '192.168.11.35', '192.168.11.36', '192.168.11.37', '192.168.11.38',
       '192.168.11.39', '192.168.11.40', '192.168.11.41', '192.168.11.42',
       '192.168.11.43', '192.168.11.44', '192.168.11.45', '192.168.11.46',
       '192.168.11.47', '192.168.11.48', '192.168.11.49', '192.168.11.50',
       '192.168.11.51', '192.168