In [79]:
from pathlib import Path

import pandas as pd
import numpy as np
import sys

from imblearn.under_sampling import RandomUnderSampler
from sklearn.preprocessing import LabelEncoder

import sklearn.neighbors._base
sys.modules['sklearn.neighbors.base'] = sklearn.neighbors._base

from missingpy import MissForest

pd.set_option("display.max_columns", None)

from datetime import datetime, timedelta

# Preparing dataset

In [2]:
file_prefix = 'scada'
files = [f for f in Path('./flowtbag/').glob(f'*.csv')
         if f.name.startswith(file_prefix)]
files.append(Path(f'arquivos-trafego-normal/{file_prefix}-normal.csv'))

columns = ["srcip", "srcport", "dstip", "dstport", "proto", "total_fpackets", "total_fvolume", "total_bpackets", "total_bvolume", "min_fpktl", "mean_fpktl", "max_fpktl", "std_fpktl", "min_bpktl", "mean_bpktl", "max_bpktl", "std_bpktl", "min_fiat", "mean_fiat", "max_fiat", "std_fiat", "min_biat", "mean_biat", "max_biat", "std_biat",
           "duration", "min_active", "mean_active", "max_active", "std_active", "min_idle", "mean_idle", "max_idle", "std_idle", "sflow_fpackets", "sflow_fbytes", "sflow_bpackets", "sflow_bbytes", "fpsh_cnt", "bpsh_cnt", "furg_cnt", "burg_cnt", "total_fhlen", "total_bhlen", "timestamp", "dscp", "iptables", "snort_linux", "snort_win", "attack", "tipo"]


In [3]:
diff = lambda x1, x2: [i for i in x1 if i not in x2]

In [4]:
unwanted_columns = ['srcip', 'srcport', 'dstip', 'dstport', 'proto', 'dscp']
diff_columns = diff(columns, unwanted_columns)
df = pd.DataFrame(columns=diff_columns)
rus = RandomUnderSampler()

for f in files:
    _df = pd.read_csv(f, names=columns)
    t = f.stem.split('-', 1)[1]
    _df['tipo'] = t
    _df.drop(unwanted_columns, axis=1, inplace=True)

    # Changing discrete variables to continuous
    _df[['iptables', 'snort_linux', 'snort_win']] = _df[['iptables', 'snort_linux', 'snort_win']].applymap(lambda x: 0 if x == False else 1).astype('int')
    _df['attack'] = _df['attack'].map(lambda x: 0 if x == 'normal' else 1).astype('int')

    # Removing duplicated instances
    _df.drop_duplicates(inplace=True, ignore_index=True)

    # Undersampling
    if 'normal' not in t:
        X, y = rus.fit_resample(_df.drop('attack', axis=1), _df['attack'])
        X['attack'] = y
        _df = X

    df = df.append(_df, ignore_index=True)

    print(f'DONE: {t}')

  df = df.append(_df, ignore_index=True)


DONE: nmap-port-scan


  df = df.append(_df, ignore_index=True)


DONE: smod-read-coils


  df = df.append(_df, ignore_index=True)


DONE: smod-scanner-uid


  df = df.append(_df, ignore_index=True)


DONE: nessus-advanced-scan


  df = df.append(_df, ignore_index=True)


DONE: smod-write-single-register


  df = df.append(_df, ignore_index=True)


DONE: smod-dos-write-single-coils


  df = df.append(_df, ignore_index=True)


DONE: smod-write-single-coils


  df = df.append(_df, ignore_index=True)


DONE: acunetix-sql-injection


  df = df.append(_df, ignore_index=True)


DONE: smod-read-holding-register


  df = df.append(_df, ignore_index=True)


DONE: smod-get-func


  df = df.append(_df, ignore_index=True)


DONE: arachni-code-injection


  df = df.append(_df, ignore_index=True)


DONE: smod-dos-write-single-register


  df = df.append(_df, ignore_index=True)


DONE: acunetix-xss


  df = df.append(_df, ignore_index=True)


DONE: smod-read-input-register
DONE: normal


  df = df.append(_df, ignore_index=True)


In [5]:
df['attack'].value_counts() / len(df)

0    0.588518
1    0.411482
Name: attack, dtype: float64

In [6]:
df.head()

Unnamed: 0,total_fpackets,total_fvolume,total_bpackets,total_bvolume,min_fpktl,mean_fpktl,max_fpktl,std_fpktl,min_bpktl,mean_bpktl,max_bpktl,std_bpktl,min_fiat,mean_fiat,max_fiat,std_fiat,min_biat,mean_biat,max_biat,std_biat,duration,min_active,mean_active,max_active,std_active,min_idle,mean_idle,max_idle,std_idle,sflow_fpackets,sflow_fbytes,sflow_bpackets,sflow_bbytes,fpsh_cnt,bpsh_cnt,furg_cnt,burg_cnt,total_fhlen,total_bhlen,timestamp,iptables,snort_linux,snort_win,attack,tipo
0,24,1377,23,10023,52,57,173,24,40,435,552,199,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,24,1377,23,10023,1,0,0,0,1256,940,1622409403,0,0,0,0,nmap-port-scan
1,141,7461,203,107010,52,52,173,10,40,527,552,95,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,141,7461,203,107010,1,0,0,0,7340,8140,1622411954,0,0,0,0,nmap-port-scan
2,6,441,5,515,52,73,173,48,40,103,335,129,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,6,441,5,515,1,0,0,0,320,220,1622410311,0,0,0,0,nmap-port-scan
3,50,2729,56,27201,52,54,173,17,40,485,552,149,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,50,2729,56,27201,1,0,0,0,2608,2260,1622415800,0,0,0,0,nmap-port-scan
4,30,1689,29,12744,52,56,173,22,40,439,552,194,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,30,1689,29,12744,1,0,0,0,1568,1180,1622412272,0,0,0,0,nmap-port-scan


In [7]:
# Checking which columns have missing values. Using this approach to avoid truncated output from jupyter.
", ".join(str(i) for i in df.isna().sum())

'0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0'

In [8]:
rus = RandomUnderSampler()
df['attack'] = df['attack'].astype('int')

X, y = rus.fit_resample(df.drop('attack', axis=1), df['attack'])
X['attack'] = y

In [9]:
X['attack'].value_counts() / len(X)

0    0.5
1    0.5
Name: attack, dtype: float64

In [10]:
X.drop_duplicates(inplace=True, ignore_index=True)

In [11]:
X['attack'].value_counts() / len(X)

0    0.5
1    0.5
Name: attack, dtype: float64

In [12]:
len(X)

237436

In [13]:
X.to_csv(f'{file_prefix}-all-attacks.csv', index=False)

# Preparing sysmon within sysstat file

In [14]:
headers = "timestamp,ID1,ID2,ID3,ID4,ID5,ID6,ID7,ID8,ID9,ID10,ID11,ID12,ID13,ID14,ID15,ID16,ID17,ID18,ID19,ID20,ID21,ID22,rxpck,txpck,rxkB,txkB,rxcmp,txcmp,rxmcst,ifutil,kbmemfree,kbmemused,memused,kbbuffers,kbcached,kbcommit,commit,kbactive,kbinact,kbdirty,kbanonpg,kbslab,kbkstack,kbpgtbl,kbvmused,CPU,usr,nice,sys,iowait,steal,irq,soft,guest,gnice,idle,pswpin,pswpout,proc,cswch,intr,sum,classe_atk,classe"
df = pd.read_csv('flowtbag/sysmon-sysstat.csv', names=headers.split(','), sep=',')

df.head()

Unnamed: 0,timestamp,ID1,ID2,ID3,ID4,ID5,ID6,ID7,ID8,ID9,ID10,ID11,ID12,ID13,ID14,ID15,ID16,ID17,ID18,ID19,ID20,ID21,ID22,rxpck,txpck,rxkB,txkB,rxcmp,txcmp,rxmcst,ifutil,kbmemfree,kbmemused,memused,kbbuffers,kbcached,kbcommit,commit,kbactive,kbinact,kbdirty,kbanonpg,kbslab,kbkstack,kbpgtbl,kbvmused,CPU,usr,nice,sys,iowait,steal,irq,soft,guest,gnice,idle,pswpin,pswpout,proc,cswch,intr,sum,classe_atk,classe
0,1622327000.0,enp0s3,20,0,1,0,0,0,0,0,1837300,2209352,5460,160044,681744,4297120,5216,1313120,609012,552,1072388,204644,9456,34732,0,all,624,0,203,0,0,0,0,0,0,9174,0,0,60,214371,909.0,acunetix,ataque,,,,,,,,,,,,,,,,,,,,,,
1,1622327000.0,enp0s3,30,0,2,0,0,0,0,0,1837224,2209428,5460,160044,681760,4291612,5209,1312964,609048,100,1072312,204644,9440,34732,0,all,440,0,119,0,0,0,0,0,0,9440,0,0,20,166467,360.0,acunetix,ataque,,,,,,,,,,,,,,,,,,,,,,
2,1622327000.0,enp0s3,10,0,1,0,0,0,0,0,1837440,2209212,5459,160044,681772,4297608,5216,1312576,609008,108,1071916,204644,9440,34732,0,all,317,0,140,0,0,0,5,0,0,9537,0,0,20,157555,170.0,acunetix,ataque,,,,,,,,,,,,,,,,,,,,,,
3,1622327000.0,enp0s3,20,0,1,0,0,0,0,0,1838572,2208080,5457,160044,681788,4305504,5226,1311856,609016,48,1071200,204644,9408,34740,0,all,412,0,165,0,0,0,0,0,0,9423,0,0,20,167323,200.0,acunetix,ataque,,,,,,,,,,,,,,,,,,,,,,
4,1622327000.0,enp0s3,10,0,1,0,0,0,0,0,1835920,2210732,5463,160044,681952,4302312,5222,1314124,609104,12,1073324,204644,9408,35024,0,all,385,0,135,0,0,0,0,0,0,9479,0,0,50,162983,250.0,acunetix,ataque,,,,,,,,,,,,,,,,,,,,,,


In [15]:
df.isna().sum()

timestamp         0
ID1               0
ID2               0
ID3               0
ID4               0
              ...  
cswch         22345
intr          22345
sum           22345
classe_atk    22345
classe        22345
Length: 65, dtype: int64

In [16]:
st_names = "timestamp,rxpck,txpck,rxkB,txkB,rxcmp,txcmp,rxmcst,ifutil,kbmemfree,kbmemused,memused,kbbuffers,kbcached,kbcommit,commit,kbactive,kbinact,kbdirty,kbanonpg,kbslab,kbkstack,kbpgtbl,kbvmused,CPU,usr,nice,sys,iowait,steal,irq,soft,guest,gnice,idle,pswpin,pswpout,proc,cswch,intr,sum,classe_atk,classe"
st = pd.read_csv('flowtbag/sysstat.csv', names=st_names.split(','))

sm_names = "timestamp,ID1,ID2,ID3,ID4,ID5,ID6,ID7,ID8,ID9,ID10,ID11,ID12,ID13,ID14,ID15,ID16,ID17,ID18,ID19,ID20,ID21,ID22,classe_atk,classe"
sm = pd.read_csv('flowtbag/sysmonout.csv', names=sm_names.split(','), sep=';')

In [17]:
round_date = lambda x, _format: x.round(freq=f'{_format}')
to_iso = lambda x: pd.Timestamp.fromtimestamp(x)

In [18]:
st.head(3)

Unnamed: 0,timestamp,rxpck,txpck,rxkB,txkB,rxcmp,txcmp,rxmcst,ifutil,kbmemfree,kbmemused,memused,kbbuffers,kbcached,kbcommit,commit,kbactive,kbinact,kbdirty,kbanonpg,kbslab,kbkstack,kbpgtbl,kbvmused,CPU,usr,nice,sys,iowait,steal,irq,soft,guest,gnice,idle,pswpin,pswpout,proc,cswch,intr,sum,classe_atk,classe
0,1622399861,enp0s3,443423,0,140924,0,0,0,0,115,1119272,2927380,7234,166500,998980,4975052,6038,1885928,746856,572,1458716,210100,9520,36144,0,all,2869,0,170,0,0,0,1141,0,0,5820,0,0,50,107373,284129.0,acunetix,ataque
1,1622399871,enp0s3,446806,0,156706,0,0,0,30,128,1118744,2927908,7235,166500,999236,4975052,6038,1886252,746848,744,1458768,210100,9520,36068,0,all,2407,0,59,0,0,0,205,0,0,7330,0,0,20,39980,301597.0,acunetix,ataque
2,1622399881,enp0s3,330251,0,116850,0,0,0,30,96,1114452,2932200,7246,166500,999512,4983748,6049,1890156,746852,336,1462388,210100,9520,36300,0,all,1528,0,39,0,0,0,137,0,0,8295,0,0,50,42154,217705.0,acunetix,ataque


In [19]:
for col in 'txpck,rxkB,txkB,rxcmp,txcmp,rxmcst,ifutil,kbmemfree,kbmemused,memused,kbbuffers,kbcached,kbcommit,commit,kbactive,kbinact,kbdirty,kbanonpg,kbslab,kbkstack,kbpgtbl,kbvmused,CPU,nice,sys,iowait,steal,irq,soft,guest,gnice,idle,pswpin,pswpout,proc,cswch,intr'.split(','):
    # Changing , to . then converting str to float
    st[col] = st[col].map(lambda x: float(str(x).replace(',', '.')))

In [20]:
st.head(3)

Unnamed: 0,timestamp,rxpck,txpck,rxkB,txkB,rxcmp,txcmp,rxmcst,ifutil,kbmemfree,kbmemused,memused,kbbuffers,kbcached,kbcommit,commit,kbactive,kbinact,kbdirty,kbanonpg,kbslab,kbkstack,kbpgtbl,kbvmused,CPU,usr,nice,sys,iowait,steal,irq,soft,guest,gnice,idle,pswpin,pswpout,proc,cswch,intr,sum,classe_atk,classe
0,1622399861,enp0s3,4434.23,0.0,1409.24,0.0,0.0,0.0,0.0,1.15,1119272.0,2927380.0,72.34,166500.0,998980.0,4975052.0,60.38,1885928.0,746856.0,572.0,1458716.0,210100.0,9520.0,36144.0,0.0,all,28.69,0.0,1.7,0.0,0.0,0.0,11.41,0.0,0.0,58.2,0.0,0.0,0.5,1073.73,284129.0,acunetix,ataque
1,1622399871,enp0s3,4468.06,0.0,1567.06,0.0,0.0,0.0,0.3,1.28,1118744.0,2927908.0,72.35,166500.0,999236.0,4975052.0,60.38,1886252.0,746848.0,744.0,1458768.0,210100.0,9520.0,36068.0,0.0,all,24.07,0.0,0.59,0.0,0.0,0.0,2.05,0.0,0.0,73.3,0.0,0.0,0.2,399.8,301597.0,acunetix,ataque
2,1622399881,enp0s3,3302.51,0.0,1168.5,0.0,0.0,0.0,0.3,0.96,1114452.0,2932200.0,72.46,166500.0,999512.0,4983748.0,60.49,1890156.0,746852.0,336.0,1462388.0,210100.0,9520.0,36300.0,0.0,all,15.28,0.0,0.39,0.0,0.0,0.0,1.37,0.0,0.0,82.95,0.0,0.0,0.5,421.54,217705.0,acunetix,ataque


In [21]:
st.select_dtypes('number').columns.values

array(['timestamp', 'txpck', 'rxkB', 'txkB', 'rxcmp', 'txcmp', 'rxmcst',
       'ifutil', 'kbmemfree', 'kbmemused', 'memused', 'kbbuffers',
       'kbcached', 'kbcommit', 'commit', 'kbactive', 'kbinact', 'kbdirty',
       'kbanonpg', 'kbslab', 'kbkstack', 'kbpgtbl', 'kbvmused', 'CPU',
       'nice', 'sys', 'iowait', 'steal', 'irq', 'soft', 'guest', 'gnice',
       'idle', 'pswpin', 'pswpout', 'proc', 'cswch', 'intr', 'sum'],
      dtype=object)

In [22]:
st.select_dtypes(['object', 'datetime']).columns.values

array(['rxpck', 'usr', 'classe_atk', 'classe'], dtype=object)

In [23]:
le = LabelEncoder()
st['classe_atk_num'] = le.fit_transform(st['classe_atk'])
st['classe_num'] = le.fit_transform(st['classe'])

In [24]:
st.head(3)

Unnamed: 0,timestamp,rxpck,txpck,rxkB,txkB,rxcmp,txcmp,rxmcst,ifutil,kbmemfree,kbmemused,memused,kbbuffers,kbcached,kbcommit,commit,kbactive,kbinact,kbdirty,kbanonpg,kbslab,kbkstack,kbpgtbl,kbvmused,CPU,usr,nice,sys,iowait,steal,irq,soft,guest,gnice,idle,pswpin,pswpout,proc,cswch,intr,sum,classe_atk,classe,classe_atk_num,classe_num
0,1622399861,enp0s3,4434.23,0.0,1409.24,0.0,0.0,0.0,0.0,1.15,1119272.0,2927380.0,72.34,166500.0,998980.0,4975052.0,60.38,1885928.0,746856.0,572.0,1458716.0,210100.0,9520.0,36144.0,0.0,all,28.69,0.0,1.7,0.0,0.0,0.0,11.41,0.0,0.0,58.2,0.0,0.0,0.5,1073.73,284129.0,acunetix,ataque,0,0
1,1622399871,enp0s3,4468.06,0.0,1567.06,0.0,0.0,0.0,0.3,1.28,1118744.0,2927908.0,72.35,166500.0,999236.0,4975052.0,60.38,1886252.0,746848.0,744.0,1458768.0,210100.0,9520.0,36068.0,0.0,all,24.07,0.0,0.59,0.0,0.0,0.0,2.05,0.0,0.0,73.3,0.0,0.0,0.2,399.8,301597.0,acunetix,ataque,0,0
2,1622399881,enp0s3,3302.51,0.0,1168.5,0.0,0.0,0.0,0.3,0.96,1114452.0,2932200.0,72.46,166500.0,999512.0,4983748.0,60.49,1890156.0,746852.0,336.0,1462388.0,210100.0,9520.0,36300.0,0.0,all,15.28,0.0,0.39,0.0,0.0,0.0,1.37,0.0,0.0,82.95,0.0,0.0,0.5,421.54,217705.0,acunetix,ataque,0,0


In [25]:
sm.head(3)

Unnamed: 0,timestamp,ID1,ID2,ID3,ID4,ID5,ID6,ID7,ID8,ID9,ID10,ID11,ID12,ID13,ID14,ID15,ID16,ID17,ID18,ID19,ID20,ID21,ID22,classe_atk,classe
0,1622401000.0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,acunetix,ataque
1,1622401000.0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,acunetix,ataque
2,1622401000.0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,acunetix,ataque


In [26]:
sm['classe_atk_num'] = le.fit_transform(sm['classe_atk'])
sm['classe_num'] = le.fit_transform(sm['classe'])

In [27]:
sm.head()

Unnamed: 0,timestamp,ID1,ID2,ID3,ID4,ID5,ID6,ID7,ID8,ID9,ID10,ID11,ID12,ID13,ID14,ID15,ID16,ID17,ID18,ID19,ID20,ID21,ID22,classe_atk,classe,classe_atk_num,classe_num
0,1622401000.0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,acunetix,ataque,0,0
1,1622401000.0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,acunetix,ataque,0,0
2,1622401000.0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,acunetix,ataque,0,0
3,1622401000.0,4,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,acunetix,ataque,0,0
4,1622401000.0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,acunetix,ataque,0,0


In [28]:
sm['timestamp'] = sm['timestamp'].map(to_iso)
st['timestamp'] = st['timestamp'].map(to_iso)

In [29]:
ret = st.join(sm, lsuffix='_sysmon')

In [30]:
ret.isna().sum() / len(ret)

timestamp_sysmon    0.000000
rxpck               0.000000
txpck               0.000000
rxkB                0.000000
txkB                0.000000
                      ...   
ID22                0.309502
classe_atk          0.309502
classe              0.309502
classe_atk_num      0.309502
classe_num          0.309502
Length: 72, dtype: float64

In [31]:
ret

Unnamed: 0,timestamp_sysmon,rxpck,txpck,rxkB,txkB,rxcmp,txcmp,rxmcst,ifutil,kbmemfree,kbmemused,memused,kbbuffers,kbcached,kbcommit,commit,kbactive,kbinact,kbdirty,kbanonpg,kbslab,kbkstack,kbpgtbl,kbvmused,CPU,usr,nice,sys,iowait,steal,irq,soft,guest,gnice,idle,pswpin,pswpout,proc,cswch,intr,sum,classe_atk_sysmon,classe_sysmon,classe_atk_num_sysmon,classe_num_sysmon,timestamp,ID1,ID2,ID3,ID4,ID5,ID6,ID7,ID8,ID9,ID10,ID11,ID12,ID13,ID14,ID15,ID16,ID17,ID18,ID19,ID20,ID21,ID22,classe_atk,classe,classe_atk_num,classe_num
0,2021-05-30 15:37:41,enp0s3,4434.23,0.0,1409.24,0.0,0.0,0.0,0.0,1.15,1119272.0,2927380.0,72.34,166500.0,998980.0,4975052.0,60.38,1885928.0,746856.0,572.0,1458716.0,210100.0,9520.0,36144.0,0.0,all,28.69,0.0,1.70,0.00,0.0,0.0,11.41,0.0,0.0,58.20,0.0,0.0,0.50,1073.73,284129.0,acunetix,ataque,0,0,2021-05-30 15:51:44,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,acunetix,ataque,0.0,0.0
1,2021-05-30 15:37:51,enp0s3,4468.06,0.0,1567.06,0.0,0.0,0.0,0.3,1.28,1118744.0,2927908.0,72.35,166500.0,999236.0,4975052.0,60.38,1886252.0,746848.0,744.0,1458768.0,210100.0,9520.0,36068.0,0.0,all,24.07,0.0,0.59,0.00,0.0,0.0,2.05,0.0,0.0,73.30,0.0,0.0,0.20,399.80,301597.0,acunetix,ataque,0,0,2021-05-30 15:51:56,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,acunetix,ataque,0.0,0.0
2,2021-05-30 15:38:01,enp0s3,3302.51,0.0,1168.50,0.0,0.0,0.0,0.3,0.96,1114452.0,2932200.0,72.46,166500.0,999512.0,4983748.0,60.49,1890156.0,746852.0,336.0,1462388.0,210100.0,9520.0,36300.0,0.0,all,15.28,0.0,0.39,0.00,0.0,0.0,1.37,0.0,0.0,82.95,0.0,0.0,0.50,421.54,217705.0,acunetix,ataque,0,0,2021-05-30 15:52:10,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,acunetix,ataque,0.0,0.0
3,2021-05-30 15:38:11,enp0s3,3978.12,0.0,1354.35,0.0,0.0,0.0,0.0,1.11,1117724.0,2928928.0,72.38,166500.0,999736.0,4983748.0,60.49,1887732.0,746848.0,532.0,1459704.0,210092.0,9504.0,36072.0,0.0,all,19.99,0.0,0.43,0.00,0.0,0.0,0.43,0.0,0.0,79.15,0.0,0.0,0.20,312.09,288991.0,acunetix,ataque,0,0,2021-05-30 15:52:20,4.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,acunetix,ataque,0.0,0.0
4,2021-05-30 15:38:21,enp0s3,4272.78,0.0,1455.68,0.0,0.0,0.0,0.0,1.19,1117000.0,2929652.0,72.40,166500.0,1000024.0,4983748.0,60.49,1887976.0,746856.0,720.0,1459744.0,210092.0,9504.0,36072.0,0.0,all,23.42,0.0,0.20,0.00,0.0,0.0,6.32,0.0,0.0,70.06,0.0,0.0,0.20,353.64,282892.0,acunetix,ataque,0,0,2021-05-30 15:52:34,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,acunetix,ataque,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
13213,2021-06-09 17:18:01,enp0s3,0.10,0.0,0.01,0.0,0.0,0.0,0.0,0.00,1082984.0,2963668.0,73.24,174284.0,1251584.0,4414052.0,53.58,1593176.0,1045972.0,24.0,1204060.0,239068.0,9472.0,35164.0,0.0,all,4.04,0.0,1.52,0.00,0.0,0.0,0.05,0.0,0.0,94.39,0.0,0.0,0.50,1794.08,250.0,smod,ataque,4,0,NaT,,,,,,,,,,,,,,,,,,,,,,,,,,
13214,2021-06-09 17:18:11,enp0s3,0.70,0.0,0.15,0.0,0.0,0.0,0.5,0.00,1086924.0,2959728.0,73.14,174284.0,1251440.0,4416332.0,53.60,1589748.0,1045856.0,84.0,1200684.0,239068.0,9472.0,35000.0,0.0,all,3.86,0.0,1.20,0.00,0.0,0.0,0.00,0.0,0.0,94.93,0.0,0.0,0.20,1764.14,190.0,smod,ataque,4,0,NaT,,,,,,,,,,,,,,,,,,,,,,,,,,
13215,2021-06-09 17:18:21,enp0s3,0.20,0.0,0.07,0.0,0.0,0.0,0.4,0.00,1087032.0,2959620.0,73.14,174284.0,1251452.0,4406420.0,53.48,1589792.0,1045852.0,100.0,1200716.0,239068.0,9472.0,35000.0,0.0,all,5.77,0.0,1.66,0.00,0.0,0.0,0.00,0.0,0.0,92.58,0.0,0.0,0.50,2027.62,1417.0,smod,ataque,4,0,NaT,,,,,,,,,,,,,,,,,,,,,,,,,,
13216,2021-06-09 17:18:31,enp0s3,0.00,0.0,0.00,0.0,0.0,0.0,0.0,0.00,1087172.0,2959480.0,73.13,174284.0,1251468.0,4409196.0,53.52,1589840.0,1045860.0,108.0,1200772.0,239068.0,9472.0,35004.0,0.0,all,7.76,0.0,2.22,0.00,0.0,0.0,0.05,0.0,0.0,89.96,0.0,0.0,0.30,2589.62,1437.0,smod,ataque,4,0,NaT,,,,,,,,,,,,,,,,,,,,,,,,,,


In [32]:
ret.timestamp.head()

0   2021-05-30 15:51:44
1   2021-05-30 15:51:56
2   2021-05-30 15:52:10
3   2021-05-30 15:52:20
4   2021-05-30 15:52:34
Name: timestamp, dtype: datetime64[ns]

In [33]:
mf = MissForest()

In [34]:
# Selecting only numerical variables
cat_vars = [*st.select_dtypes(['object', 'datetime']).columns.values, 'timestamp_sysmon', 'classe', 'classe_atk', 'classe_atk_sysmon', 'classe_sysmon']

mf_data = mf.fit_transform(ret.drop(cat_vars, axis=1))

  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(


Iteration: 0


  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(


Iteration: 1


  warn(


In [35]:
new_df = pd.DataFrame(mf_data, columns=ret.drop(cat_vars, axis=1).columns.values)

In [36]:
new_df.head(3)

Unnamed: 0,txpck,rxkB,txkB,rxcmp,txcmp,rxmcst,ifutil,kbmemfree,kbmemused,memused,kbbuffers,kbcached,kbcommit,commit,kbactive,kbinact,kbdirty,kbanonpg,kbslab,kbkstack,kbpgtbl,kbvmused,CPU,nice,sys,iowait,steal,irq,soft,guest,gnice,idle,pswpin,pswpout,proc,cswch,intr,sum,classe_atk_num_sysmon,classe_num_sysmon,ID1,ID2,ID3,ID4,ID5,ID6,ID7,ID8,ID9,ID10,ID11,ID12,ID13,ID14,ID15,ID16,ID17,ID18,ID19,ID20,ID21,ID22,classe_atk_num,classe_num
0,4434.23,0.0,1409.24,0.0,0.0,0.0,0.0,1.15,1119272.0,2927380.0,72.34,166500.0,998980.0,4975052.0,60.38,1885928.0,746856.0,572.0,1458716.0,210100.0,9520.0,36144.0,0.0,28.69,0.0,1.7,0.0,0.0,0.0,11.41,0.0,0.0,58.2,0.0,0.0,0.5,1073.73,284129.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,4468.06,0.0,1567.06,0.0,0.0,0.0,0.3,1.28,1118744.0,2927908.0,72.35,166500.0,999236.0,4975052.0,60.38,1886252.0,746848.0,744.0,1458768.0,210100.0,9520.0,36068.0,0.0,24.07,0.0,0.59,0.0,0.0,0.0,2.05,0.0,0.0,73.3,0.0,0.0,0.2,399.8,301597.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,3302.51,0.0,1168.5,0.0,0.0,0.0,0.3,0.96,1114452.0,2932200.0,72.46,166500.0,999512.0,4983748.0,60.49,1890156.0,746852.0,336.0,1462388.0,210100.0,9520.0,36300.0,0.0,15.28,0.0,0.39,0.0,0.0,0.0,1.37,0.0,0.0,82.95,0.0,0.0,0.5,421.54,217705.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [37]:
cols = ['classe_atk_num_sysmon', 'classe_num_sysmon', 'classe_atk_num', 'classe_num']
new_df[cols] = new_df[cols].applymap(int)

In [38]:
# Storing a label encoder for each column
# Sysmon
class_atk_sysmon = LabelEncoder().fit(sm['classe_atk'])
class_sysmon = LabelEncoder().fit(sm['classe'])

# Sysstat
class_atk_sysstat = LabelEncoder().fit(st['classe_atk'])
class_sysstat = LabelEncoder().fit(st['classe'])

In [39]:
class_atk_sysmon.inverse_transform(new_df['classe_atk_num_sysmon'])

array(['acunetix', 'acunetix', 'acunetix', ..., 'smod', 'smod', 'smod'],
      dtype=object)

In [40]:
new_df['timestamp'] = st['timestamp']

new_df['class_atk_sysmon'] = class_atk_sysmon.inverse_transform(new_df['classe_atk_num_sysmon'])
new_df['class_sysmon'] = class_sysmon.inverse_transform(new_df['classe_num_sysmon'])
new_df['class_atk_sysstat'] = class_atk_sysstat.inverse_transform(new_df['classe_atk_num'])
new_df['class_sysstat'] = class_sysstat.inverse_transform(new_df['classe_num'])

In [41]:
new_df

Unnamed: 0,txpck,rxkB,txkB,rxcmp,txcmp,rxmcst,ifutil,kbmemfree,kbmemused,memused,kbbuffers,kbcached,kbcommit,commit,kbactive,kbinact,kbdirty,kbanonpg,kbslab,kbkstack,kbpgtbl,kbvmused,CPU,nice,sys,iowait,steal,irq,soft,guest,gnice,idle,pswpin,pswpout,proc,cswch,intr,sum,classe_atk_num_sysmon,classe_num_sysmon,ID1,ID2,ID3,ID4,ID5,ID6,ID7,ID8,ID9,ID10,ID11,ID12,ID13,ID14,ID15,ID16,ID17,ID18,ID19,ID20,ID21,ID22,classe_atk_num,classe_num,timestamp,class_atk_sysmon,class_sysmon,class_atk_sysstat,class_sysstat
0,4434.23,0.0,1409.24,0.0,0.0,0.0,0.0,1.15,1119272.0,2927380.0,72.34,166500.0,998980.0,4975052.0,60.38,1885928.0,746856.0,572.0,1458716.0,210100.0,9520.0,36144.0,0.0,28.69,0.0,1.70,0.00,0.0,0.0,11.41,0.0,0.0,58.20,0.0,0.0,0.50,1073.73,284129.0,0,0,0.00,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,0,2021-05-30 15:37:41,acunetix,ataque,acunetix,ataque
1,4468.06,0.0,1567.06,0.0,0.0,0.0,0.3,1.28,1118744.0,2927908.0,72.35,166500.0,999236.0,4975052.0,60.38,1886252.0,746848.0,744.0,1458768.0,210100.0,9520.0,36068.0,0.0,24.07,0.0,0.59,0.00,0.0,0.0,2.05,0.0,0.0,73.30,0.0,0.0,0.20,399.80,301597.0,0,0,0.00,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,0,2021-05-30 15:37:51,acunetix,ataque,acunetix,ataque
2,3302.51,0.0,1168.50,0.0,0.0,0.0,0.3,0.96,1114452.0,2932200.0,72.46,166500.0,999512.0,4983748.0,60.49,1890156.0,746852.0,336.0,1462388.0,210100.0,9520.0,36300.0,0.0,15.28,0.0,0.39,0.00,0.0,0.0,1.37,0.0,0.0,82.95,0.0,0.0,0.50,421.54,217705.0,0,0,0.00,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,0,2021-05-30 15:38:01,acunetix,ataque,acunetix,ataque
3,3978.12,0.0,1354.35,0.0,0.0,0.0,0.0,1.11,1117724.0,2928928.0,72.38,166500.0,999736.0,4983748.0,60.49,1887732.0,746848.0,532.0,1459704.0,210092.0,9504.0,36072.0,0.0,19.99,0.0,0.43,0.00,0.0,0.0,0.43,0.0,0.0,79.15,0.0,0.0,0.20,312.09,288991.0,0,0,4.00,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,0,2021-05-30 15:38:11,acunetix,ataque,acunetix,ataque
4,4272.78,0.0,1455.68,0.0,0.0,0.0,0.0,1.19,1117000.0,2929652.0,72.40,166500.0,1000024.0,4983748.0,60.49,1887976.0,746856.0,720.0,1459744.0,210092.0,9504.0,36072.0,0.0,23.42,0.0,0.20,0.00,0.0,0.0,6.32,0.0,0.0,70.06,0.0,0.0,0.20,353.64,282892.0,0,0,0.00,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,0,2021-05-30 15:38:21,acunetix,ataque,acunetix,ataque
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
13213,0.10,0.0,0.01,0.0,0.0,0.0,0.0,0.00,1082984.0,2963668.0,73.24,174284.0,1251584.0,4414052.0,53.58,1593176.0,1045972.0,24.0,1204060.0,239068.0,9472.0,35164.0,0.0,4.04,0.0,1.52,0.00,0.0,0.0,0.05,0.0,0.0,94.39,0.0,0.0,0.50,1794.08,250.0,4,0,0.28,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,4,0,2021-06-09 17:18:01,smod,ataque,smod,ataque
13214,0.70,0.0,0.15,0.0,0.0,0.0,0.5,0.00,1086924.0,2959728.0,73.14,174284.0,1251440.0,4416332.0,53.60,1589748.0,1045856.0,84.0,1200684.0,239068.0,9472.0,35000.0,0.0,3.86,0.0,1.20,0.00,0.0,0.0,0.00,0.0,0.0,94.93,0.0,0.0,0.20,1764.14,190.0,4,0,0.52,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,4,0,2021-06-09 17:18:11,smod,ataque,smod,ataque
13215,0.20,0.0,0.07,0.0,0.0,0.0,0.4,0.00,1087032.0,2959620.0,73.14,174284.0,1251452.0,4406420.0,53.48,1589792.0,1045852.0,100.0,1200716.0,239068.0,9472.0,35000.0,0.0,5.77,0.0,1.66,0.00,0.0,0.0,0.00,0.0,0.0,92.58,0.0,0.0,0.50,2027.62,1417.0,4,0,0.44,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,4,0,2021-06-09 17:18:21,smod,ataque,smod,ataque
13216,0.00,0.0,0.00,0.0,0.0,0.0,0.0,0.00,1087172.0,2959480.0,73.13,174284.0,1251468.0,4409196.0,53.52,1589840.0,1045860.0,108.0,1200772.0,239068.0,9472.0,35004.0,0.0,7.76,0.0,2.22,0.00,0.0,0.0,0.05,0.0,0.0,89.96,0.0,0.0,0.30,2589.62,1437.0,4,0,0.28,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,4,0,2021-06-09 17:18:31,smod,ataque,smod,ataque


In [42]:
new_df.isna().sum() / len(new_df)

txpck                0.0
rxkB                 0.0
txkB                 0.0
rxcmp                0.0
txcmp                0.0
                    ... 
timestamp            0.0
class_atk_sysmon     0.0
class_sysmon         0.0
class_atk_sysstat    0.0
class_sysstat        0.0
Length: 69, dtype: float64

In [43]:
len(new_df), len(df)

(13218, 22345)

In [44]:
len(new_df.columns)

69

In [45]:
new_df.to_csv('sysmon-sysstat.csv', index=False)

## Checking if the number of columns matches with the data fields in sysstat-sysmon.csv file

In [46]:
a = '1622326821.0, enp0s3, "0,20", "0,00", "0,01", "0,00", "0,00", "0,00", "0,00", "0,00", 1837300, 2209352, "54,60", 160044, 681744, 4297120, "52,16", 1313120, 609012, 552, 1072388, 204644, 9456, 34732, 0, all, "6,24", "0,00", "2,03", "0,00", "0,00", "0,00", "0,00", "0,00", "0,00", "91,74", "0,00", "0,00", "0,60", "2143,71", 909.0, acunetix, ataque'

In [47]:
aa = [float(i.replace(',', '.').replace('\"', '')) if ',' in i or '.' in i else i for i in a.split(', ')]

In [48]:
len(aa)

43

In [49]:
len(headers.split(','))

65

# Merging ```sysmon-sysstat.csv``` to ```scada-all-attacks.csv```

In [50]:
df = pd.read_csv('sysmon-sysstat.csv')
scada = pd.read_csv('scada-all-attacks.csv')

In [51]:
df.head()

Unnamed: 0,txpck,rxkB,txkB,rxcmp,txcmp,rxmcst,ifutil,kbmemfree,kbmemused,memused,kbbuffers,kbcached,kbcommit,commit,kbactive,kbinact,kbdirty,kbanonpg,kbslab,kbkstack,kbpgtbl,kbvmused,CPU,nice,sys,iowait,steal,irq,soft,guest,gnice,idle,pswpin,pswpout,proc,cswch,intr,sum,classe_atk_num_sysmon,classe_num_sysmon,ID1,ID2,ID3,ID4,ID5,ID6,ID7,ID8,ID9,ID10,ID11,ID12,ID13,ID14,ID15,ID16,ID17,ID18,ID19,ID20,ID21,ID22,classe_atk_num,classe_num,timestamp,class_atk_sysmon,class_sysmon,class_atk_sysstat,class_sysstat
0,4434.23,0.0,1409.24,0.0,0.0,0.0,0.0,1.15,1119272.0,2927380.0,72.34,166500.0,998980.0,4975052.0,60.38,1885928.0,746856.0,572.0,1458716.0,210100.0,9520.0,36144.0,0.0,28.69,0.0,1.7,0.0,0.0,0.0,11.41,0.0,0.0,58.2,0.0,0.0,0.5,1073.73,284129.0,0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,0,2021-05-30 15:37:41,acunetix,ataque,acunetix,ataque
1,4468.06,0.0,1567.06,0.0,0.0,0.0,0.3,1.28,1118744.0,2927908.0,72.35,166500.0,999236.0,4975052.0,60.38,1886252.0,746848.0,744.0,1458768.0,210100.0,9520.0,36068.0,0.0,24.07,0.0,0.59,0.0,0.0,0.0,2.05,0.0,0.0,73.3,0.0,0.0,0.2,399.8,301597.0,0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,0,2021-05-30 15:37:51,acunetix,ataque,acunetix,ataque
2,3302.51,0.0,1168.5,0.0,0.0,0.0,0.3,0.96,1114452.0,2932200.0,72.46,166500.0,999512.0,4983748.0,60.49,1890156.0,746852.0,336.0,1462388.0,210100.0,9520.0,36300.0,0.0,15.28,0.0,0.39,0.0,0.0,0.0,1.37,0.0,0.0,82.95,0.0,0.0,0.5,421.54,217705.0,0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,0,2021-05-30 15:38:01,acunetix,ataque,acunetix,ataque
3,3978.12,0.0,1354.35,0.0,0.0,0.0,0.0,1.11,1117724.0,2928928.0,72.38,166500.0,999736.0,4983748.0,60.49,1887732.0,746848.0,532.0,1459704.0,210092.0,9504.0,36072.0,0.0,19.99,0.0,0.43,0.0,0.0,0.0,0.43,0.0,0.0,79.15,0.0,0.0,0.2,312.09,288991.0,0,0,4.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,0,2021-05-30 15:38:11,acunetix,ataque,acunetix,ataque
4,4272.78,0.0,1455.68,0.0,0.0,0.0,0.0,1.19,1117000.0,2929652.0,72.4,166500.0,1000024.0,4983748.0,60.49,1887976.0,746856.0,720.0,1459744.0,210092.0,9504.0,36072.0,0.0,23.42,0.0,0.2,0.0,0.0,0.0,6.32,0.0,0.0,70.06,0.0,0.0,0.2,353.64,282892.0,0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,0,2021-05-30 15:38:21,acunetix,ataque,acunetix,ataque


In [52]:
scada.head()

Unnamed: 0,total_fpackets,total_fvolume,total_bpackets,total_bvolume,min_fpktl,mean_fpktl,max_fpktl,std_fpktl,min_bpktl,mean_bpktl,max_bpktl,std_bpktl,min_fiat,mean_fiat,max_fiat,std_fiat,min_biat,mean_biat,max_biat,std_biat,duration,min_active,mean_active,max_active,std_active,min_idle,mean_idle,max_idle,std_idle,sflow_fpackets,sflow_fbytes,sflow_bpackets,sflow_bbytes,fpsh_cnt,bpsh_cnt,furg_cnt,burg_cnt,total_fhlen,total_bhlen,timestamp,iptables,snort_linux,snort_win,tipo,attack
0,90,4809,127,67413,52,53,173,12,40,530,552,95,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,90,4809,127,67413,1,0,0,0,4688,5100,1623257004,0,0,0,smod-write-single-coils,0
1,11,488,4,243,40,44,52,6,40,60,111,33,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,11,488,4,243,2,1,0,0,464,172,1625841167,1,0,1,normal,0
2,17,800,8,412,40,47,52,6,40,51,72,14,0,0,1,0,1,0,1,0,3,3,3,3,0,0,0,0,0,17,800,8,412,6,3,0,0,728,332,1623203108,1,0,1,smod-read-holding-register,0
3,9,408,4,243,40,45,52,6,40,60,111,33,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,9,408,4,243,2,1,0,0,384,172,1623245925,1,0,1,smod-read-input-register,0
4,59,3197,79,40953,52,54,173,15,40,518,552,118,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,59,3197,79,40953,1,0,0,0,3076,3180,1622406926,0,0,0,acunetix-sql-injection,0


In [102]:
def round_date(x, round_to=10):
    """
        round_to: value in seconds to round
        x: value to round
    """
    
    try:
        ret = datetime.fromisoformat(str(x))
    except ValueError:
        ret = datetime.fromtimestamp(x)

    ret = (ret.timestamp() + round_to / 2) // round_to * round_to
    return int(ret)

In [105]:
scada['timestamp'] = scada['timestamp'].map(round_date)
df['timestamp'] = df['timestamp'].map(round_date)

In [106]:
df = df.sort_values('timestamp')
scada = scada.sort_values('timestamp')

In [107]:
scada.head()

Unnamed: 0,total_fpackets,total_fvolume,total_bpackets,total_bvolume,min_fpktl,mean_fpktl,max_fpktl,std_fpktl,min_bpktl,mean_bpktl,max_bpktl,std_bpktl,min_fiat,mean_fiat,max_fiat,std_fiat,min_biat,mean_biat,max_biat,std_biat,duration,min_active,mean_active,max_active,std_active,min_idle,mean_idle,max_idle,std_idle,sflow_fpackets,sflow_fbytes,sflow_bpackets,sflow_bbytes,fpsh_cnt,bpsh_cnt,furg_cnt,burg_cnt,total_fhlen,total_bhlen,timestamp,iptables,snort_linux,snort_win,tipo,attack
84806,9,408,4,181,40,45,52,6,40,45,52,6,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,9,408,4,181,2,1,0,0,384,172,1622383980,1,0,0,acunetix-xss,0
94912,11,488,4,181,40,44,52,6,40,45,52,6,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,11,488,4,181,2,1,0,0,464,172,1622383990,1,0,0,acunetix-xss,0
101970,11,488,4,181,40,44,52,6,40,45,52,6,0,0,1,0,0,0,1,0,1,1,1,1,0,0,0,0,0,11,488,4,181,2,1,0,0,464,172,1622383990,1,0,0,acunetix-xss,0
19025,11,488,4,181,40,44,52,6,40,45,52,6,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,11,488,4,181,2,1,0,0,464,172,1622384020,1,0,1,acunetix-xss,0
98769,11,488,4,181,40,44,52,6,40,45,52,6,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,11,488,4,181,2,1,0,0,464,172,1622384060,1,1,1,acunetix-xss,0


In [108]:
ret = pd.merge_asof(scada, df, on='timestamp', direction='nearest')
ret

Unnamed: 0,total_fpackets,total_fvolume,total_bpackets,total_bvolume,min_fpktl,mean_fpktl,max_fpktl,std_fpktl,min_bpktl,mean_bpktl,max_bpktl,std_bpktl,min_fiat,mean_fiat,max_fiat,std_fiat,min_biat,mean_biat,max_biat,std_biat,duration,min_active,mean_active,max_active,std_active,min_idle,mean_idle,max_idle,std_idle,sflow_fpackets,sflow_fbytes,sflow_bpackets,sflow_bbytes,fpsh_cnt,bpsh_cnt,furg_cnt,burg_cnt,total_fhlen,total_bhlen,timestamp,iptables,snort_linux,snort_win,tipo,attack,txpck,rxkB,txkB,rxcmp,txcmp,rxmcst,ifutil,kbmemfree,kbmemused,memused,kbbuffers,kbcached,kbcommit,commit,kbactive,kbinact,kbdirty,kbanonpg,kbslab,kbkstack,kbpgtbl,kbvmused,CPU,nice,sys,iowait,steal,irq,soft,guest,gnice,idle,pswpin,pswpout,proc,cswch,intr,sum,classe_atk_num_sysmon,classe_num_sysmon,ID1,ID2,ID3,ID4,ID5,ID6,ID7,ID8,ID9,ID10,ID11,ID12,ID13,ID14,ID15,ID16,ID17,ID18,ID19,ID20,ID21,ID22,classe_atk_num,classe_num,class_atk_sysmon,class_sysmon,class_atk_sysstat,class_sysstat
0,9,408,4,181,40,45,52,6,40,45,52,6,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,9,408,4,181,2,1,0,0,384,172,1622383980,1,0,0,acunetix-xss,0,4434.23,0.0,1409.24,0.0,0.0,0.0,0.0,1.15,1119272.0,2927380.0,72.34,166500.0,998980.0,4975052.0,60.38,1885928.0,746856.0,572.0,1458716.0,210100.0,9520.0,36144.0,0.0,28.69,0.0,1.70,0.0,0.0,0.0,11.41,0.0,0.0,58.20,0.0,0.0,0.5,1073.73,284129.0,0,0,0.00,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,0,acunetix,ataque,acunetix,ataque
1,11,488,4,181,40,44,52,6,40,45,52,6,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,11,488,4,181,2,1,0,0,464,172,1622383990,1,0,0,acunetix-xss,0,4434.23,0.0,1409.24,0.0,0.0,0.0,0.0,1.15,1119272.0,2927380.0,72.34,166500.0,998980.0,4975052.0,60.38,1885928.0,746856.0,572.0,1458716.0,210100.0,9520.0,36144.0,0.0,28.69,0.0,1.70,0.0,0.0,0.0,11.41,0.0,0.0,58.20,0.0,0.0,0.5,1073.73,284129.0,0,0,0.00,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,0,acunetix,ataque,acunetix,ataque
2,11,488,4,181,40,44,52,6,40,45,52,6,0,0,1,0,0,0,1,0,1,1,1,1,0,0,0,0,0,11,488,4,181,2,1,0,0,464,172,1622383990,1,0,0,acunetix-xss,0,4434.23,0.0,1409.24,0.0,0.0,0.0,0.0,1.15,1119272.0,2927380.0,72.34,166500.0,998980.0,4975052.0,60.38,1885928.0,746856.0,572.0,1458716.0,210100.0,9520.0,36144.0,0.0,28.69,0.0,1.70,0.0,0.0,0.0,11.41,0.0,0.0,58.20,0.0,0.0,0.5,1073.73,284129.0,0,0,0.00,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,0,acunetix,ataque,acunetix,ataque
3,11,488,4,181,40,44,52,6,40,45,52,6,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,11,488,4,181,2,1,0,0,464,172,1622384020,1,0,1,acunetix-xss,0,4434.23,0.0,1409.24,0.0,0.0,0.0,0.0,1.15,1119272.0,2927380.0,72.34,166500.0,998980.0,4975052.0,60.38,1885928.0,746856.0,572.0,1458716.0,210100.0,9520.0,36144.0,0.0,28.69,0.0,1.70,0.0,0.0,0.0,11.41,0.0,0.0,58.20,0.0,0.0,0.5,1073.73,284129.0,0,0,0.00,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,0,acunetix,ataque,acunetix,ataque
4,11,488,4,181,40,44,52,6,40,45,52,6,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,11,488,4,181,2,1,0,0,464,172,1622384060,1,1,1,acunetix-xss,0,4434.23,0.0,1409.24,0.0,0.0,0.0,0.0,1.15,1119272.0,2927380.0,72.34,166500.0,998980.0,4975052.0,60.38,1885928.0,746856.0,572.0,1458716.0,210100.0,9520.0,36144.0,0.0,28.69,0.0,1.70,0.0,0.0,0.0,11.41,0.0,0.0,58.20,0.0,0.0,0.5,1073.73,284129.0,0,0,0.00,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,0,acunetix,ataque,acunetix,ataque
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
237431,6,441,5,515,52,73,173,48,40,103,335,129,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,6,441,5,515,1,0,0,0,320,220,1625898470,0,0,0,normal,0,0.90,0.0,0.06,0.0,0.0,0.0,0.1,0.00,784464.0,3262188.0,80.61,175872.0,1471704.0,4451304.0,54.03,1764636.0,1165072.0,176.0,1272776.0,249508.0,9504.0,35252.0,0.0,6.19,0.0,1.69,0.0,0.0,0.0,0.05,0.0,0.0,92.07,0.0,0.0,0.3,1924.25,450.0,4,0,0.72,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,4,0,smod,ataque,smod,ataque
237432,65,3509,86,43411,52,53,173,15,40,504,552,138,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,65,3509,86,43411,1,0,0,0,3388,3460,1625898470,0,0,0,normal,0,0.90,0.0,0.06,0.0,0.0,0.0,0.1,0.00,784464.0,3262188.0,80.61,175872.0,1471704.0,4451304.0,54.03,1764636.0,1165072.0,176.0,1272776.0,249508.0,9504.0,35252.0,0.0,6.19,0.0,1.69,0.0,0.0,0.0,0.05,0.0,0.0,92.07,0.0,0.0,0.3,1924.25,450.0,4,0,0.72,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,4,0,smod,ataque,smod,ataque
237433,68,3665,91,43611,52,53,173,14,40,479,552,149,1,0,1,0,1,0,1,0,1,1,1,1,0,0,0,0,0,68,3665,91,43611,1,0,0,0,3544,3660,1625898470,0,0,0,normal,0,0.90,0.0,0.06,0.0,0.0,0.0,0.1,0.00,784464.0,3262188.0,80.61,175872.0,1471704.0,4451304.0,54.03,1764636.0,1165072.0,176.0,1272776.0,249508.0,9504.0,35252.0,0.0,6.19,0.0,1.69,0.0,0.0,0.0,0.05,0.0,0.0,92.07,0.0,0.0,0.3,1924.25,450.0,4,0,0.72,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,4,0,smod,ataque,smod,ataque
237434,8,392,4,314,40,49,52,5,40,78,111,37,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,8,392,4,314,2,2,0,0,368,172,1625898470,1,0,1,normal,0,0.90,0.0,0.06,0.0,0.0,0.0,0.1,0.00,784464.0,3262188.0,80.61,175872.0,1471704.0,4451304.0,54.03,1764636.0,1165072.0,176.0,1272776.0,249508.0,9504.0,35252.0,0.0,6.19,0.0,1.69,0.0,0.0,0.0,0.05,0.0,0.0,92.07,0.0,0.0,0.3,1924.25,450.0,4,0,0.72,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,4,0,smod,ataque,smod,ataque


In [109]:
# Checking which columns have missing values. Using this approach to avoid truncated output from jupyter.
", ".join(str(i) for i in ret.isna().sum())

'0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0'

## Normalizing data

In [110]:
from sklearn.preprocessing import PowerTransformer

In [111]:
classes_columns = ['classe_atk_num_sysmon', 'classe_num_sysmon', 'timestamp', 'classe_atk_num', 'classe_num', 'classe_atk_num', 'classe_num', 'classe_atk_num_sysmon', 'classe_num_sysmon', 'attack', 'tipo', 'iptables', 'snort_linux', 'snort_win']
features = ret.drop(classes_columns, axis=1).select_dtypes('number').columns

In [112]:
pt = PowerTransformer()
ret[features] = pt.fit_transform(np.array(ret[features]))

  loglike = -n_samples / 2 * np.log(x_trans.var())
  x = um.multiply(x, x, out=x)


In [113]:
ret

Unnamed: 0,total_fpackets,total_fvolume,total_bpackets,total_bvolume,min_fpktl,mean_fpktl,max_fpktl,std_fpktl,min_bpktl,mean_bpktl,max_bpktl,std_bpktl,min_fiat,mean_fiat,max_fiat,std_fiat,min_biat,mean_biat,max_biat,std_biat,duration,min_active,mean_active,max_active,std_active,min_idle,mean_idle,max_idle,std_idle,sflow_fpackets,sflow_fbytes,sflow_bpackets,sflow_bbytes,fpsh_cnt,bpsh_cnt,furg_cnt,burg_cnt,total_fhlen,total_bhlen,timestamp,iptables,snort_linux,snort_win,tipo,attack,txpck,rxkB,txkB,rxcmp,txcmp,rxmcst,ifutil,kbmemfree,kbmemused,memused,kbbuffers,kbcached,kbcommit,commit,kbactive,kbinact,kbdirty,kbanonpg,kbslab,kbkstack,kbpgtbl,kbvmused,CPU,nice,sys,iowait,steal,irq,soft,guest,gnice,idle,pswpin,pswpout,proc,cswch,intr,sum,classe_atk_num_sysmon,classe_num_sysmon,ID1,ID2,ID3,ID4,ID5,ID6,ID7,ID8,ID9,ID10,ID11,ID12,ID13,ID14,ID15,ID16,ID17,ID18,ID19,ID20,ID21,ID22,classe_atk_num,classe_num,class_atk_sysmon,class_sysmon,class_atk_sysstat,class_sysstat
0,-0.530123,-0.971702,-1.164850,-1.348202,-1.397826,-1.753410,-1.584743,-0.909202,-4.024558e-16,-1.457073,-1.483963,-1.334085,-0.219143,-0.16733,-0.703855,-0.254047,-0.283989,-0.219691,-0.686129,-0.306024,-0.702207,-0.702207,-0.702207,-0.702207,0.0,0.0,0.0,0.0,0.0,-0.530123,-0.971702,-1.164850,-1.348202,1.348145,0.369791,0.0,0.0,-0.626448,-1.310379,1622383980,1,0,0,acunetix-xss,0,2.533171,-0.014942,2.896157,-0.014942,0.0,0.0,-0.473030,2.906511,0.561869,-0.542701,-0.543068,-1.646158,-1.764710,0.0,5.689893e-16,0.0,-2.118143,2.092521,0.0,0.0,0.0,0.0,0.0,1.445127,-0.008946,-0.760418,-0.862584,0.0,0.0,2.626099,0.0,0.0,-1.699551,-0.425507,0.0,0.008067,-1.589251,0.0,0,0,-1.106568,0.0,-0.014657,0.0,-0.018359,0.0,0.0,0.0,0.0,0.0,-0.014367,0.0,-0.009178,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-0.016031,0,0,acunetix,ataque,acunetix,ataque
1,-0.342637,-0.790963,-1.164850,-1.348202,-1.397826,-1.943878,-1.584743,-0.909202,-4.024558e-16,-1.457073,-1.483963,-1.334085,-0.219143,-0.16733,-0.703855,-0.254047,-0.283989,-0.219691,-0.686129,-0.306024,-0.702207,-0.702207,-0.702207,-0.702207,0.0,0.0,0.0,0.0,0.0,-0.342637,-0.790963,-1.164850,-1.348202,1.348145,0.369791,0.0,0.0,-0.443472,-1.310379,1622383990,1,0,0,acunetix-xss,0,2.533171,-0.014942,2.896157,-0.014942,0.0,0.0,-0.473030,2.906511,0.561869,-0.542701,-0.543068,-1.646158,-1.764710,0.0,5.689893e-16,0.0,-2.118143,2.092521,0.0,0.0,0.0,0.0,0.0,1.445127,-0.008946,-0.760418,-0.862584,0.0,0.0,2.626099,0.0,0.0,-1.699551,-0.425507,0.0,0.008067,-1.589251,0.0,0,0,-1.106568,0.0,-0.014657,0.0,-0.018359,0.0,0.0,0.0,0.0,0.0,-0.014367,0.0,-0.009178,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-0.016031,0,0,acunetix,ataque,acunetix,ataque
2,-0.342637,-0.790963,-1.164850,-1.348202,-1.397826,-1.943878,-1.584743,-0.909202,-4.024558e-16,-1.457073,-1.483963,-1.334085,-0.219143,-0.16733,1.280793,-0.254047,-0.283989,-0.219691,1.333336,-0.306024,1.225713,1.225713,1.225713,1.225713,0.0,0.0,0.0,0.0,0.0,-0.342637,-0.790963,-1.164850,-1.348202,1.348145,0.369791,0.0,0.0,-0.443472,-1.310379,1622383990,1,0,0,acunetix-xss,0,2.533171,-0.014942,2.896157,-0.014942,0.0,0.0,-0.473030,2.906511,0.561869,-0.542701,-0.543068,-1.646158,-1.764710,0.0,5.689893e-16,0.0,-2.118143,2.092521,0.0,0.0,0.0,0.0,0.0,1.445127,-0.008946,-0.760418,-0.862584,0.0,0.0,2.626099,0.0,0.0,-1.699551,-0.425507,0.0,0.008067,-1.589251,0.0,0,0,-1.106568,0.0,-0.014657,0.0,-0.018359,0.0,0.0,0.0,0.0,0.0,-0.014367,0.0,-0.009178,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-0.016031,0,0,acunetix,ataque,acunetix,ataque
3,-0.342637,-0.790963,-1.164850,-1.348202,-1.397826,-1.943878,-1.584743,-0.909202,-4.024558e-16,-1.457073,-1.483963,-1.334085,-0.219143,-0.16733,-0.703855,-0.254047,-0.283989,-0.219691,-0.686129,-0.306024,-0.702207,-0.702207,-0.702207,-0.702207,0.0,0.0,0.0,0.0,0.0,-0.342637,-0.790963,-1.164850,-1.348202,1.348145,0.369791,0.0,0.0,-0.443472,-1.310379,1622384020,1,0,1,acunetix-xss,0,2.533171,-0.014942,2.896157,-0.014942,0.0,0.0,-0.473030,2.906511,0.561869,-0.542701,-0.543068,-1.646158,-1.764710,0.0,5.689893e-16,0.0,-2.118143,2.092521,0.0,0.0,0.0,0.0,0.0,1.445127,-0.008946,-0.760418,-0.862584,0.0,0.0,2.626099,0.0,0.0,-1.699551,-0.425507,0.0,0.008067,-1.589251,0.0,0,0,-1.106568,0.0,-0.014657,0.0,-0.018359,0.0,0.0,0.0,0.0,0.0,-0.014367,0.0,-0.009178,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-0.016031,0,0,acunetix,ataque,acunetix,ataque
4,-0.342637,-0.790963,-1.164850,-1.348202,-1.397826,-1.943878,-1.584743,-0.909202,-4.024558e-16,-1.457073,-1.483963,-1.334085,-0.219143,-0.16733,-0.703855,-0.254047,-0.283989,-0.219691,-0.686129,-0.306024,-0.702207,-0.702207,-0.702207,-0.702207,0.0,0.0,0.0,0.0,0.0,-0.342637,-0.790963,-1.164850,-1.348202,1.348145,0.369791,0.0,0.0,-0.443472,-1.310379,1622384060,1,1,1,acunetix-xss,0,2.533171,-0.014942,2.896157,-0.014942,0.0,0.0,-0.473030,2.906511,0.561869,-0.542701,-0.543068,-1.646158,-1.764710,0.0,5.689893e-16,0.0,-2.118143,2.092521,0.0,0.0,0.0,0.0,0.0,1.445127,-0.008946,-0.760418,-0.862584,0.0,0.0,2.626099,0.0,0.0,-1.699551,-0.425507,0.0,0.008067,-1.589251,0.0,0,0,-1.106568,0.0,-0.014657,0.0,-0.018359,0.0,0.0,0.0,0.0,0.0,-0.014367,0.0,-0.009178,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-0.016031,0,0,acunetix,ataque,acunetix,ataque
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
237431,-0.934844,-0.892186,-1.006784,-1.000105,0.637125,0.681691,0.069759,0.818906,-4.024558e-16,-0.978289,-0.257901,0.257847,-0.219143,-0.16733,-0.703855,-0.254047,-0.283989,-0.219691,-0.686129,-0.306024,-0.702207,-0.702207,-0.702207,-0.702207,0.0,0.0,0.0,0.0,0.0,-0.934844,-0.892186,-1.006784,-1.000105,-0.357127,-0.958049,0.0,0.0,-0.813424,-1.088592,1625898470,0,0,0,normal,0,0.652522,-0.014942,-0.171461,-0.014942,0.0,0.0,2.109353,-0.344075,-1.109261,1.104430,1.103651,1.434916,0.915072,0.0,1.387779e-17,0.0,0.696390,0.358995,0.0,0.0,0.0,0.0,0.0,-0.843524,-0.008946,-0.773880,-0.862584,0.0,0.0,-0.458248,0.0,0.0,0.917079,-0.425507,0.0,-0.908882,-0.283827,0.0,4,0,0.436445,0.0,-0.014657,0.0,-0.018359,0.0,0.0,0.0,0.0,0.0,-0.014367,0.0,-0.009178,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-0.016031,4,0,smod,ataque,smod,ataque
237432,0.921354,0.740886,0.784995,0.663538,0.637125,-0.621898,0.069759,-0.023733,-4.024558e-16,0.479748,0.222180,0.303067,-0.219143,-0.16733,-0.703855,-0.254047,-0.283989,-0.219691,-0.686129,-0.306024,-0.702207,-0.702207,-0.702207,-0.702207,0.0,0.0,0.0,0.0,0.0,0.921354,0.740886,0.784995,0.663538,-0.357127,-0.958049,0.0,0.0,0.937980,0.789446,1625898470,0,0,0,normal,0,0.652522,-0.014942,-0.171461,-0.014942,0.0,0.0,2.109353,-0.344075,-1.109261,1.104430,1.103651,1.434916,0.915072,0.0,1.387779e-17,0.0,0.696390,0.358995,0.0,0.0,0.0,0.0,0.0,-0.843524,-0.008946,-0.773880,-0.862584,0.0,0.0,-0.458248,0.0,0.0,0.917079,-0.425507,0.0,-0.908882,-0.283827,0.0,4,0,0.436445,0.0,-0.014657,0.0,-0.018359,0.0,0.0,0.0,0.0,0.0,-0.014367,0.0,-0.009178,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-0.016031,4,0,smod,ataque,smod,ataque
237433,0.944961,0.766753,0.813672,0.665429,0.637125,-0.621898,0.069759,-0.083644,-4.024558e-16,0.418496,0.222180,0.355070,4.563229,-0.16733,1.280793,-0.254047,3.520791,-0.219691,1.333336,-0.306024,1.225713,1.225713,1.225713,1.225713,0.0,0.0,0.0,0.0,0.0,0.944961,0.766753,0.813672,0.665429,-0.357127,-0.958049,0.0,0.0,0.960208,0.818185,1625898470,0,0,0,normal,0,0.652522,-0.014942,-0.171461,-0.014942,0.0,0.0,2.109353,-0.344075,-1.109261,1.104430,1.103651,1.434916,0.915072,0.0,1.387779e-17,0.0,0.696390,0.358995,0.0,0.0,0.0,0.0,0.0,-0.843524,-0.008946,-0.773880,-0.862584,0.0,0.0,-0.458248,0.0,0.0,0.917079,-0.425507,0.0,-0.908882,-0.283827,0.0,4,0,0.436445,0.0,-0.014657,0.0,-0.018359,0.0,0.0,0.0,0.0,0.0,-0.014367,0.0,-0.009178,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-0.016031,4,0,smod,ataque,smod,ataque
237434,-0.644268,-1.013212,-1.164850,-1.166866,-1.397826,-1.112863,-1.584743,-1.106017,-4.024558e-16,-1.155997,-1.074875,-0.497509,-0.219143,-0.16733,-0.703855,-0.254047,-0.283989,-0.219691,-0.686129,-0.306024,-0.702207,-0.702207,-0.702207,-0.702207,0.0,0.0,0.0,0.0,0.0,-0.644268,-1.013212,-1.164850,-1.166866,1.348145,0.859631,0.0,0.0,-0.669133,-1.310379,1625898470,1,0,1,normal,0,0.652522,-0.014942,-0.171461,-0.014942,0.0,0.0,2.109353,-0.344075,-1.109261,1.104430,1.103651,1.434916,0.915072,0.0,1.387779e-17,0.0,0.696390,0.358995,0.0,0.0,0.0,0.0,0.0,-0.843524,-0.008946,-0.773880,-0.862584,0.0,0.0,-0.458248,0.0,0.0,0.917079,-0.425507,0.0,-0.908882,-0.283827,0.0,4,0,0.436445,0.0,-0.014657,0.0,-0.018359,0.0,0.0,0.0,0.0,0.0,-0.014367,0.0,-0.009178,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-0.016031,4,0,smod,ataque,smod,ataque


In [114]:
ret.to_csv('scada-sysmon-sysstat.csv', index=False)