In [2]:
from pathlib import Path

import pandas as pd
import numpy as np
import sys

from imblearn.under_sampling import RandomUnderSampler
from sklearn.preprocessing import LabelEncoder

import sklearn.neighbors._base
sys.modules['sklearn.neighbors.base'] = sklearn.neighbors._base

from missingpy import MissForest

pd.set_option("display.max_columns", None)

from datetime import datetime

# Preparing dataset

In [190]:
file_prefix = 'scada'
files = [f for f in Path('./flowtbag/').glob(f'*.csv')
         if f.name.startswith(file_prefix)]
files.append(Path(f'arquivos-trafego-normal/{file_prefix}-normal.csv'))

columns = ["srcip", "srcport", "dstip", "dstport", "proto", "total_fpackets", "total_fvolume", "total_bpackets", "total_bvolume", "min_fpktl", "mean_fpktl", "max_fpktl", "std_fpktl", "min_bpktl", "mean_bpktl", "max_bpktl", "std_bpktl", "min_fiat", "mean_fiat", "max_fiat", "std_fiat", "min_biat", "mean_biat", "max_biat", "std_biat",
           "duration", "min_active", "mean_active", "max_active", "std_active", "min_idle", "mean_idle", "max_idle", "std_idle", "sflow_fpackets", "sflow_fbytes", "sflow_bpackets", "sflow_bbytes", "fpsh_cnt", "bpsh_cnt", "furg_cnt", "burg_cnt", "total_fhlen", "total_bhlen", "timestamp", "dscp", "iptables", "snort_linux", "snort_win", "attack", "tipo"]


In [191]:
diff = lambda x1, x2: [i for i in x1 if i not in x2]

In [227]:
unwanted_columns = ['srcip', 'srcport', 'dstip', 'dstport', 'proto', 'dscp']
diff_columns = diff(columns, unwanted_columns)
df = pd.DataFrame(columns=diff_columns)
rus = RandomUnderSampler()

for f in files:
    _df = pd.read_csv(f, names=columns)
    t = f.stem.split('-', 1)[1]
    _df['tipo'] = t
    _df.drop(unwanted_columns, axis=1, inplace=True)

    # Changing discrete variables to continuous
    _df[['iptables', 'snort_linux', 'snort_win']] = _df[['iptables', 'snort_linux', 'snort_win']].applymap(lambda x: 0 if x == False else 1).astype('int')
    _df['attack'] = _df['attack'].map(lambda x: 0 if x == 'normal' else 1).astype('int')

    # Removing duplicated instances
    _df.drop_duplicates(inplace=True, ignore_index=True)

    # Undersampling
    if 'normal' not in t:
        X, y = rus.fit_resample(_df.drop('attack', axis=1), _df['attack'])
        X['attack'] = y
        _df = X

    df = df.append(_df, ignore_index=True)

    print(f'DONE: {t}')

DONE: acunetix-sql-injection
DONE: smod-dos-write-single-coils
DONE: smod-write-single-coils
DONE: smod-write-single-register
DONE: smod-read-input-register
DONE: acunetix-xss
DONE: arachni-code-injection
DONE: smod-get-func
DONE: smod-scanner-uid
DONE: smod-read-coils
DONE: nessus-advanced-scan
DONE: smod-read-holding-register
DONE: smod-dos-write-single-register
DONE: nmap-port-scan
DONE: normal


In [228]:
df['attack'].value_counts() / len(df)

0    0.588518
1    0.411482
Name: attack, dtype: float64

In [194]:
df.head()

Unnamed: 0,total_fpackets,total_fvolume,total_bpackets,total_bvolume,min_fpktl,mean_fpktl,max_fpktl,std_fpktl,min_bpktl,mean_bpktl,max_bpktl,std_bpktl,min_fiat,mean_fiat,max_fiat,std_fiat,min_biat,mean_biat,max_biat,std_biat,duration,min_active,mean_active,max_active,std_active,min_idle,mean_idle,max_idle,std_idle,sflow_fpackets,sflow_fbytes,sflow_bpackets,sflow_bbytes,fpsh_cnt,bpsh_cnt,furg_cnt,burg_cnt,total_fhlen,total_bhlen,timestamp,iptables,snort_linux,snort_win,attack,tipo
0,22,1273,21,8673,52,57,173,25,40,413,552,204,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,22,1273,21,8673,1,0,0,0,1152,860,1622406675,0,0,0,0,acunetix-sql-injection
1,292,15313,452,247139,52,52,173,7,40,546,552,50,0,0,1,0,0,0,1,0,1,1,1,1,0,0,0,0,0,292,15313,452,247139,1,0,0,0,15192,18100,1622405164,0,0,0,0,acunetix-sql-injection
2,161,8501,290,157947,52,52,173,9,40,544,552,59,0,0,1,0,0,0,1,0,1,1,1,1,0,0,0,0,0,161,8501,290,157947,1,0,0,0,8380,11620,1622406718,0,0,0,0,acunetix-sql-injection
3,175,9229,288,155970,52,52,173,9,40,541,552,69,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,175,9229,288,155970,1,0,0,0,9108,11540,1622399744,0,0,0,0,acunetix-sql-injection
4,68,3665,93,48968,52,53,173,14,40,526,552,108,0,0,1,0,0,0,1,0,1,1,1,1,0,0,0,0,0,68,3665,93,48968,1,0,0,0,3544,3740,1622400743,0,0,0,0,acunetix-sql-injection


In [196]:
# Checking which columns have missing values. Using this approach to avoid truncated output from jupyter.
", ".join(str(i) for i in df.isna().sum())

'0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0'

In [7]:
rus = RandomUnderSampler()
df['attack'] = df['attack'].astype('int')

X, y = rus.fit_resample(df.drop('attack', axis=1), df['attack'])
X['attack'] = y

In [8]:
X['attack'].value_counts() / len(X)

0    0.5
1    0.5
Name: attack, dtype: float64

In [9]:
X.drop_duplicates(inplace=True, ignore_index=True)

In [10]:
X['attack'].value_counts() / len(X)

0    0.5
1    0.5
Name: attack, dtype: float64

In [11]:
len(X)

237436

In [12]:
X.to_csv(f'{file_prefix}-all-attacks.csv', index=False)

# Preparing sysmon within sysstat file

In [158]:
headers = "timestamp,ID1,ID2,ID3,ID4,ID5,ID6,ID7,ID8,ID9,ID10,ID11,ID12,ID13,ID14,ID15,ID16,ID17,ID18,ID19,ID20,ID21,ID22,rxpck,txpck,rxkB,txkB,rxcmp,txcmp,rxmcst,ifutil,kbmemfree,kbmemused,memused,kbbuffers,kbcached,kbcommit,commit,kbactive,kbinact,kbdirty,kbanonpg,kbslab,kbkstack,kbpgtbl,kbvmused,CPU,usr,nice,sys,iowait,steal,irq,soft,guest,gnice,idle,pswpin,pswpout,proc,cswch,intr,sum,classe_atk,classe"
df = pd.read_csv('flowtbag/sysmon-sysstat.csv', names=headers.split(','), sep=',')

df.head()

Unnamed: 0,timestamp,ID1,ID2,ID3,ID4,ID5,ID6,ID7,ID8,ID9,ID10,ID11,ID12,ID13,ID14,ID15,ID16,ID17,ID18,ID19,ID20,ID21,ID22,rxpck,txpck,rxkB,txkB,rxcmp,txcmp,rxmcst,ifutil,kbmemfree,kbmemused,memused,kbbuffers,kbcached,kbcommit,commit,kbactive,kbinact,kbdirty,kbanonpg,kbslab,kbkstack,kbpgtbl,kbvmused,CPU,usr,nice,sys,iowait,steal,irq,soft,guest,gnice,idle,pswpin,pswpout,proc,cswch,intr,sum,classe_atk,classe
0,1622327000.0,enp0s3,20,0,1,0,0,0,0,0,1837300,2209352,5460,160044,681744,4297120,5216,1313120,609012,552,1072388,204644,9456,34732,0,all,624,0,203,0,0,0,0,0,0,9174,0,0,60,214371,909.0,acunetix,ataque,,,,,,,,,,,,,,,,,,,,,,
1,1622327000.0,enp0s3,30,0,2,0,0,0,0,0,1837224,2209428,5460,160044,681760,4291612,5209,1312964,609048,100,1072312,204644,9440,34732,0,all,440,0,119,0,0,0,0,0,0,9440,0,0,20,166467,360.0,acunetix,ataque,,,,,,,,,,,,,,,,,,,,,,
2,1622327000.0,enp0s3,10,0,1,0,0,0,0,0,1837440,2209212,5459,160044,681772,4297608,5216,1312576,609008,108,1071916,204644,9440,34732,0,all,317,0,140,0,0,0,5,0,0,9537,0,0,20,157555,170.0,acunetix,ataque,,,,,,,,,,,,,,,,,,,,,,
3,1622327000.0,enp0s3,20,0,1,0,0,0,0,0,1838572,2208080,5457,160044,681788,4305504,5226,1311856,609016,48,1071200,204644,9408,34740,0,all,412,0,165,0,0,0,0,0,0,9423,0,0,20,167323,200.0,acunetix,ataque,,,,,,,,,,,,,,,,,,,,,,
4,1622327000.0,enp0s3,10,0,1,0,0,0,0,0,1835920,2210732,5463,160044,681952,4302312,5222,1314124,609104,12,1073324,204644,9408,35024,0,all,385,0,135,0,0,0,0,0,0,9479,0,0,50,162983,250.0,acunetix,ataque,,,,,,,,,,,,,,,,,,,,,,


In [159]:
df.isna().sum()

timestamp         0
ID1               0
ID2               0
ID3               0
ID4               0
              ...  
cswch         22345
intr          22345
sum           22345
classe_atk    22345
classe        22345
Length: 65, dtype: int64

In [160]:
st_names = "timestamp,rxpck,txpck,rxkB,txkB,rxcmp,txcmp,rxmcst,ifutil,kbmemfree,kbmemused,memused,kbbuffers,kbcached,kbcommit,commit,kbactive,kbinact,kbdirty,kbanonpg,kbslab,kbkstack,kbpgtbl,kbvmused,CPU,usr,nice,sys,iowait,steal,irq,soft,guest,gnice,idle,pswpin,pswpout,proc,cswch,intr,sum,classe_atk,classe"
st = pd.read_csv('flowtbag/sysstat.csv', names=st_names.split(','))

sm_names = "timestamp,ID1,ID2,ID3,ID4,ID5,ID6,ID7,ID8,ID9,ID10,ID11,ID12,ID13,ID14,ID15,ID16,ID17,ID18,ID19,ID20,ID21,ID22,classe_atk,classe"
sm = pd.read_csv('flowtbag/sysmonout.csv', names=sm_names.split(','), sep=';')

In [161]:
round_date = lambda x, _format: x.round(freq=f'{_format}')
to_iso = lambda x: pd.Timestamp.fromtimestamp(x)

In [162]:
st.head(3)

Unnamed: 0,timestamp,rxpck,txpck,rxkB,txkB,rxcmp,txcmp,rxmcst,ifutil,kbmemfree,kbmemused,memused,kbbuffers,kbcached,kbcommit,commit,kbactive,kbinact,kbdirty,kbanonpg,kbslab,kbkstack,kbpgtbl,kbvmused,CPU,usr,nice,sys,iowait,steal,irq,soft,guest,gnice,idle,pswpin,pswpout,proc,cswch,intr,sum,classe_atk,classe
0,1622399861,enp0s3,443423,0,140924,0,0,0,0,115,1119272,2927380,7234,166500,998980,4975052,6038,1885928,746856,572,1458716,210100,9520,36144,0,all,2869,0,170,0,0,0,1141,0,0,5820,0,0,50,107373,284129.0,acunetix,ataque
1,1622399871,enp0s3,446806,0,156706,0,0,0,30,128,1118744,2927908,7235,166500,999236,4975052,6038,1886252,746848,744,1458768,210100,9520,36068,0,all,2407,0,59,0,0,0,205,0,0,7330,0,0,20,39980,301597.0,acunetix,ataque
2,1622399881,enp0s3,330251,0,116850,0,0,0,30,96,1114452,2932200,7246,166500,999512,4983748,6049,1890156,746852,336,1462388,210100,9520,36300,0,all,1528,0,39,0,0,0,137,0,0,8295,0,0,50,42154,217705.0,acunetix,ataque


In [163]:
for col in 'txpck,rxkB,txkB,rxcmp,txcmp,rxmcst,ifutil,kbmemfree,kbmemused,memused,kbbuffers,kbcached,kbcommit,commit,kbactive,kbinact,kbdirty,kbanonpg,kbslab,kbkstack,kbpgtbl,kbvmused,CPU,nice,sys,iowait,steal,irq,soft,guest,gnice,idle,pswpin,pswpout,proc,cswch,intr'.split(','):
    # Changing , to . then converting str to float
    st[col] = st[col].map(lambda x: float(str(x).replace(',', '.')))

In [164]:
st.head(3)

Unnamed: 0,timestamp,rxpck,txpck,rxkB,txkB,rxcmp,txcmp,rxmcst,ifutil,kbmemfree,kbmemused,memused,kbbuffers,kbcached,kbcommit,commit,kbactive,kbinact,kbdirty,kbanonpg,kbslab,kbkstack,kbpgtbl,kbvmused,CPU,usr,nice,sys,iowait,steal,irq,soft,guest,gnice,idle,pswpin,pswpout,proc,cswch,intr,sum,classe_atk,classe
0,1622399861,enp0s3,4434.23,0.0,1409.24,0.0,0.0,0.0,0.0,1.15,1119272.0,2927380.0,72.34,166500.0,998980.0,4975052.0,60.38,1885928.0,746856.0,572.0,1458716.0,210100.0,9520.0,36144.0,0.0,all,28.69,0.0,1.7,0.0,0.0,0.0,11.41,0.0,0.0,58.2,0.0,0.0,0.5,1073.73,284129.0,acunetix,ataque
1,1622399871,enp0s3,4468.06,0.0,1567.06,0.0,0.0,0.0,0.3,1.28,1118744.0,2927908.0,72.35,166500.0,999236.0,4975052.0,60.38,1886252.0,746848.0,744.0,1458768.0,210100.0,9520.0,36068.0,0.0,all,24.07,0.0,0.59,0.0,0.0,0.0,2.05,0.0,0.0,73.3,0.0,0.0,0.2,399.8,301597.0,acunetix,ataque
2,1622399881,enp0s3,3302.51,0.0,1168.5,0.0,0.0,0.0,0.3,0.96,1114452.0,2932200.0,72.46,166500.0,999512.0,4983748.0,60.49,1890156.0,746852.0,336.0,1462388.0,210100.0,9520.0,36300.0,0.0,all,15.28,0.0,0.39,0.0,0.0,0.0,1.37,0.0,0.0,82.95,0.0,0.0,0.5,421.54,217705.0,acunetix,ataque


In [165]:
st.select_dtypes('number').columns.values

array(['timestamp', 'txpck', 'rxkB', 'txkB', 'rxcmp', 'txcmp', 'rxmcst',
       'ifutil', 'kbmemfree', 'kbmemused', 'memused', 'kbbuffers',
       'kbcached', 'kbcommit', 'commit', 'kbactive', 'kbinact', 'kbdirty',
       'kbanonpg', 'kbslab', 'kbkstack', 'kbpgtbl', 'kbvmused', 'CPU',
       'nice', 'sys', 'iowait', 'steal', 'irq', 'soft', 'guest', 'gnice',
       'idle', 'pswpin', 'pswpout', 'proc', 'cswch', 'intr', 'sum'],
      dtype=object)

In [166]:
st.select_dtypes(['object', 'datetime']).columns.values

array(['rxpck', 'usr', 'classe_atk', 'classe'], dtype=object)

In [167]:
le = LabelEncoder()
st['classe_atk_num'] = le.fit_transform(st['classe_atk'])
st['classe_num'] = le.fit_transform(st['classe'])

In [168]:
st.head(3)

Unnamed: 0,timestamp,rxpck,txpck,rxkB,txkB,rxcmp,txcmp,rxmcst,ifutil,kbmemfree,kbmemused,memused,kbbuffers,kbcached,kbcommit,commit,kbactive,kbinact,kbdirty,kbanonpg,kbslab,kbkstack,kbpgtbl,kbvmused,CPU,usr,nice,sys,iowait,steal,irq,soft,guest,gnice,idle,pswpin,pswpout,proc,cswch,intr,sum,classe_atk,classe,classe_atk_num,classe_num
0,1622399861,enp0s3,4434.23,0.0,1409.24,0.0,0.0,0.0,0.0,1.15,1119272.0,2927380.0,72.34,166500.0,998980.0,4975052.0,60.38,1885928.0,746856.0,572.0,1458716.0,210100.0,9520.0,36144.0,0.0,all,28.69,0.0,1.7,0.0,0.0,0.0,11.41,0.0,0.0,58.2,0.0,0.0,0.5,1073.73,284129.0,acunetix,ataque,0,0
1,1622399871,enp0s3,4468.06,0.0,1567.06,0.0,0.0,0.0,0.3,1.28,1118744.0,2927908.0,72.35,166500.0,999236.0,4975052.0,60.38,1886252.0,746848.0,744.0,1458768.0,210100.0,9520.0,36068.0,0.0,all,24.07,0.0,0.59,0.0,0.0,0.0,2.05,0.0,0.0,73.3,0.0,0.0,0.2,399.8,301597.0,acunetix,ataque,0,0
2,1622399881,enp0s3,3302.51,0.0,1168.5,0.0,0.0,0.0,0.3,0.96,1114452.0,2932200.0,72.46,166500.0,999512.0,4983748.0,60.49,1890156.0,746852.0,336.0,1462388.0,210100.0,9520.0,36300.0,0.0,all,15.28,0.0,0.39,0.0,0.0,0.0,1.37,0.0,0.0,82.95,0.0,0.0,0.5,421.54,217705.0,acunetix,ataque,0,0


In [169]:
sm.head(3)

Unnamed: 0,timestamp,ID1,ID2,ID3,ID4,ID5,ID6,ID7,ID8,ID9,ID10,ID11,ID12,ID13,ID14,ID15,ID16,ID17,ID18,ID19,ID20,ID21,ID22,classe_atk,classe
0,1622401000.0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,acunetix,ataque
1,1622401000.0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,acunetix,ataque
2,1622401000.0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,acunetix,ataque


In [170]:
sm['classe_atk_num'] = le.fit_transform(sm['classe_atk'])
sm['classe_num'] = le.fit_transform(sm['classe'])

In [171]:
sm.head()

Unnamed: 0,timestamp,ID1,ID2,ID3,ID4,ID5,ID6,ID7,ID8,ID9,ID10,ID11,ID12,ID13,ID14,ID15,ID16,ID17,ID18,ID19,ID20,ID21,ID22,classe_atk,classe,classe_atk_num,classe_num
0,1622401000.0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,acunetix,ataque,0,0
1,1622401000.0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,acunetix,ataque,0,0
2,1622401000.0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,acunetix,ataque,0,0
3,1622401000.0,4,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,acunetix,ataque,0,0
4,1622401000.0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,acunetix,ataque,0,0


In [157]:
sm['timestamp'] = sm['timestamp'].map(to_iso)
st['timestamp'] = st['timestamp'].map(to_iso)

In [172]:
ret = st.join(sm, lsuffix='_sysmon')

In [173]:
ret.isna().sum() / len(ret)

timestamp_sysmon    0.000000
rxpck               0.000000
txpck               0.000000
rxkB                0.000000
txkB                0.000000
                      ...   
ID22                0.309502
classe_atk          0.309502
classe              0.309502
classe_atk_num      0.309502
classe_num          0.309502
Length: 72, dtype: float64

In [174]:
ret

Unnamed: 0,timestamp_sysmon,rxpck,txpck,rxkB,txkB,rxcmp,txcmp,rxmcst,ifutil,kbmemfree,kbmemused,memused,kbbuffers,kbcached,kbcommit,commit,kbactive,kbinact,kbdirty,kbanonpg,kbslab,kbkstack,kbpgtbl,kbvmused,CPU,usr,nice,sys,iowait,steal,irq,soft,guest,gnice,idle,pswpin,pswpout,proc,cswch,intr,sum,classe_atk_sysmon,classe_sysmon,classe_atk_num_sysmon,classe_num_sysmon,timestamp,ID1,ID2,ID3,ID4,ID5,ID6,ID7,ID8,ID9,ID10,ID11,ID12,ID13,ID14,ID15,ID16,ID17,ID18,ID19,ID20,ID21,ID22,classe_atk,classe,classe_atk_num,classe_num
0,1622399861,enp0s3,4434.23,0.0,1409.24,0.0,0.0,0.0,0.0,1.15,1119272.0,2927380.0,72.34,166500.0,998980.0,4975052.0,60.38,1885928.0,746856.0,572.0,1458716.0,210100.0,9520.0,36144.0,0.0,all,28.69,0.0,1.70,0.00,0.0,0.0,11.41,0.0,0.0,58.20,0.0,0.0,0.50,1073.73,284129.0,acunetix,ataque,0,0,1.622401e+09,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,acunetix,ataque,0.0,0.0
1,1622399871,enp0s3,4468.06,0.0,1567.06,0.0,0.0,0.0,0.3,1.28,1118744.0,2927908.0,72.35,166500.0,999236.0,4975052.0,60.38,1886252.0,746848.0,744.0,1458768.0,210100.0,9520.0,36068.0,0.0,all,24.07,0.0,0.59,0.00,0.0,0.0,2.05,0.0,0.0,73.30,0.0,0.0,0.20,399.80,301597.0,acunetix,ataque,0,0,1.622401e+09,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,acunetix,ataque,0.0,0.0
2,1622399881,enp0s3,3302.51,0.0,1168.50,0.0,0.0,0.0,0.3,0.96,1114452.0,2932200.0,72.46,166500.0,999512.0,4983748.0,60.49,1890156.0,746852.0,336.0,1462388.0,210100.0,9520.0,36300.0,0.0,all,15.28,0.0,0.39,0.00,0.0,0.0,1.37,0.0,0.0,82.95,0.0,0.0,0.50,421.54,217705.0,acunetix,ataque,0,0,1.622401e+09,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,acunetix,ataque,0.0,0.0
3,1622399891,enp0s3,3978.12,0.0,1354.35,0.0,0.0,0.0,0.0,1.11,1117724.0,2928928.0,72.38,166500.0,999736.0,4983748.0,60.49,1887732.0,746848.0,532.0,1459704.0,210092.0,9504.0,36072.0,0.0,all,19.99,0.0,0.43,0.00,0.0,0.0,0.43,0.0,0.0,79.15,0.0,0.0,0.20,312.09,288991.0,acunetix,ataque,0,0,1.622401e+09,4.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,acunetix,ataque,0.0,0.0
4,1622399901,enp0s3,4272.78,0.0,1455.68,0.0,0.0,0.0,0.0,1.19,1117000.0,2929652.0,72.40,166500.0,1000024.0,4983748.0,60.49,1887976.0,746856.0,720.0,1459744.0,210092.0,9504.0,36072.0,0.0,all,23.42,0.0,0.20,0.00,0.0,0.0,6.32,0.0,0.0,70.06,0.0,0.0,0.20,353.64,282892.0,acunetix,ataque,0,0,1.622401e+09,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,acunetix,ataque,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
13213,1623269881,enp0s3,0.10,0.0,0.01,0.0,0.0,0.0,0.0,0.00,1082984.0,2963668.0,73.24,174284.0,1251584.0,4414052.0,53.58,1593176.0,1045972.0,24.0,1204060.0,239068.0,9472.0,35164.0,0.0,all,4.04,0.0,1.52,0.00,0.0,0.0,0.05,0.0,0.0,94.39,0.0,0.0,0.50,1794.08,250.0,smod,ataque,4,0,,,,,,,,,,,,,,,,,,,,,,,,,,,
13214,1623269891,enp0s3,0.70,0.0,0.15,0.0,0.0,0.0,0.5,0.00,1086924.0,2959728.0,73.14,174284.0,1251440.0,4416332.0,53.60,1589748.0,1045856.0,84.0,1200684.0,239068.0,9472.0,35000.0,0.0,all,3.86,0.0,1.20,0.00,0.0,0.0,0.00,0.0,0.0,94.93,0.0,0.0,0.20,1764.14,190.0,smod,ataque,4,0,,,,,,,,,,,,,,,,,,,,,,,,,,,
13215,1623269901,enp0s3,0.20,0.0,0.07,0.0,0.0,0.0,0.4,0.00,1087032.0,2959620.0,73.14,174284.0,1251452.0,4406420.0,53.48,1589792.0,1045852.0,100.0,1200716.0,239068.0,9472.0,35000.0,0.0,all,5.77,0.0,1.66,0.00,0.0,0.0,0.00,0.0,0.0,92.58,0.0,0.0,0.50,2027.62,1417.0,smod,ataque,4,0,,,,,,,,,,,,,,,,,,,,,,,,,,,
13216,1623269911,enp0s3,0.00,0.0,0.00,0.0,0.0,0.0,0.0,0.00,1087172.0,2959480.0,73.13,174284.0,1251468.0,4409196.0,53.52,1589840.0,1045860.0,108.0,1200772.0,239068.0,9472.0,35004.0,0.0,all,7.76,0.0,2.22,0.00,0.0,0.0,0.05,0.0,0.0,89.96,0.0,0.0,0.30,2589.62,1437.0,smod,ataque,4,0,,,,,,,,,,,,,,,,,,,,,,,,,,,


In [175]:
ret.timestamp.head()

0    1.622401e+09
1    1.622401e+09
2    1.622401e+09
3    1.622401e+09
4    1.622401e+09
Name: timestamp, dtype: float64

In [176]:
mf = MissForest()

In [177]:
# Selecting only numerical variables
cat_vars = [*st.select_dtypes(['object', 'datetime']).columns.values, 'timestamp_sysmon', 'classe', 'classe_atk', 'classe_atk_sysmon', 'classe_sysmon']

mf_data = mf.fit_transform(ret.drop(cat_vars, axis=1))

Iteration: 0
Iteration: 1
Iteration: 2


In [178]:
new_df = pd.DataFrame(mf_data, columns=ret.drop(cat_vars, axis=1).columns.values)

In [179]:
new_df.head(3)

Unnamed: 0,txpck,rxkB,txkB,rxcmp,txcmp,rxmcst,ifutil,kbmemfree,kbmemused,memused,kbbuffers,kbcached,kbcommit,commit,kbactive,kbinact,kbdirty,kbanonpg,kbslab,kbkstack,kbpgtbl,kbvmused,CPU,nice,sys,iowait,steal,irq,soft,guest,gnice,idle,pswpin,pswpout,proc,cswch,intr,sum,classe_atk_num_sysmon,classe_num_sysmon,timestamp,ID1,ID2,ID3,ID4,ID5,ID6,ID7,ID8,ID9,ID10,ID11,ID12,ID13,ID14,ID15,ID16,ID17,ID18,ID19,ID20,ID21,ID22,classe_atk_num,classe_num
0,4434.23,0.0,1409.24,0.0,0.0,0.0,0.0,1.15,1119272.0,2927380.0,72.34,166500.0,998980.0,4975052.0,60.38,1885928.0,746856.0,572.0,1458716.0,210100.0,9520.0,36144.0,0.0,28.69,0.0,1.7,0.0,0.0,0.0,11.41,0.0,0.0,58.2,0.0,0.0,0.5,1073.73,284129.0,0.0,0.0,1622401000.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,4468.06,0.0,1567.06,0.0,0.0,0.0,0.3,1.28,1118744.0,2927908.0,72.35,166500.0,999236.0,4975052.0,60.38,1886252.0,746848.0,744.0,1458768.0,210100.0,9520.0,36068.0,0.0,24.07,0.0,0.59,0.0,0.0,0.0,2.05,0.0,0.0,73.3,0.0,0.0,0.2,399.8,301597.0,0.0,0.0,1622401000.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,3302.51,0.0,1168.5,0.0,0.0,0.0,0.3,0.96,1114452.0,2932200.0,72.46,166500.0,999512.0,4983748.0,60.49,1890156.0,746852.0,336.0,1462388.0,210100.0,9520.0,36300.0,0.0,15.28,0.0,0.39,0.0,0.0,0.0,1.37,0.0,0.0,82.95,0.0,0.0,0.5,421.54,217705.0,0.0,0.0,1622401000.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [180]:
cols = ['classe_atk_num_sysmon', 'classe_num_sysmon', 'classe_atk_num', 'classe_num']
new_df[cols] = new_df[cols].applymap(int)

In [181]:
# Storing a label encoder for each column
# Sysmon
class_atk_sysmon = LabelEncoder().fit(sm['classe_atk'])
class_sysmon = LabelEncoder().fit(sm['classe'])

# Sysstat
class_atk_sysstat = LabelEncoder().fit(st['classe_atk'])
class_sysstat = LabelEncoder().fit(st['classe'])

In [182]:
class_atk_sysmon.inverse_transform(new_df['classe_atk_num_sysmon'])

array(['acunetix', 'acunetix', 'acunetix', ..., 'smod', 'smod', 'smod'],
      dtype=object)

In [183]:
new_df['timestamp'] = st['timestamp']

new_df['class_atk_sysmon'] = class_atk_sysmon.inverse_transform(new_df['classe_atk_num_sysmon'])
new_df['class_sysmon'] = class_sysmon.inverse_transform(new_df['classe_num_sysmon'])
new_df['class_atk_sysstat'] = class_atk_sysstat.inverse_transform(new_df['classe_atk_num'])
new_df['class_sysstat'] = class_sysstat.inverse_transform(new_df['classe_num'])

In [184]:
new_df

Unnamed: 0,txpck,rxkB,txkB,rxcmp,txcmp,rxmcst,ifutil,kbmemfree,kbmemused,memused,kbbuffers,kbcached,kbcommit,commit,kbactive,kbinact,kbdirty,kbanonpg,kbslab,kbkstack,kbpgtbl,kbvmused,CPU,nice,sys,iowait,steal,irq,soft,guest,gnice,idle,pswpin,pswpout,proc,cswch,intr,sum,classe_atk_num_sysmon,classe_num_sysmon,timestamp,ID1,ID2,ID3,ID4,ID5,ID6,ID7,ID8,ID9,ID10,ID11,ID12,ID13,ID14,ID15,ID16,ID17,ID18,ID19,ID20,ID21,ID22,classe_atk_num,classe_num,class_atk_sysmon,class_sysmon,class_atk_sysstat,class_sysstat
0,4434.23,0.0,1409.24,0.0,0.0,0.0,0.0,1.15,1119272.0,2927380.0,72.34,166500.0,998980.0,4975052.0,60.38,1885928.0,746856.0,572.0,1458716.0,210100.0,9520.0,36144.0,0.0,28.69,0.0,1.70,0.00,0.0,0.0,11.41,0.0,0.0,58.20,0.0,0.0,0.50,1073.73,284129.0,0,0,1622399861,0.00,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,0,acunetix,ataque,acunetix,ataque
1,4468.06,0.0,1567.06,0.0,0.0,0.0,0.3,1.28,1118744.0,2927908.0,72.35,166500.0,999236.0,4975052.0,60.38,1886252.0,746848.0,744.0,1458768.0,210100.0,9520.0,36068.0,0.0,24.07,0.0,0.59,0.00,0.0,0.0,2.05,0.0,0.0,73.30,0.0,0.0,0.20,399.80,301597.0,0,0,1622399871,0.00,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,0,acunetix,ataque,acunetix,ataque
2,3302.51,0.0,1168.50,0.0,0.0,0.0,0.3,0.96,1114452.0,2932200.0,72.46,166500.0,999512.0,4983748.0,60.49,1890156.0,746852.0,336.0,1462388.0,210100.0,9520.0,36300.0,0.0,15.28,0.0,0.39,0.00,0.0,0.0,1.37,0.0,0.0,82.95,0.0,0.0,0.50,421.54,217705.0,0,0,1622399881,0.00,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,0,acunetix,ataque,acunetix,ataque
3,3978.12,0.0,1354.35,0.0,0.0,0.0,0.0,1.11,1117724.0,2928928.0,72.38,166500.0,999736.0,4983748.0,60.49,1887732.0,746848.0,532.0,1459704.0,210092.0,9504.0,36072.0,0.0,19.99,0.0,0.43,0.00,0.0,0.0,0.43,0.0,0.0,79.15,0.0,0.0,0.20,312.09,288991.0,0,0,1622399891,4.00,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,0,acunetix,ataque,acunetix,ataque
4,4272.78,0.0,1455.68,0.0,0.0,0.0,0.0,1.19,1117000.0,2929652.0,72.40,166500.0,1000024.0,4983748.0,60.49,1887976.0,746856.0,720.0,1459744.0,210092.0,9504.0,36072.0,0.0,23.42,0.0,0.20,0.00,0.0,0.0,6.32,0.0,0.0,70.06,0.0,0.0,0.20,353.64,282892.0,0,0,1622399901,0.00,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,0,acunetix,ataque,acunetix,ataque
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
13213,0.10,0.0,0.01,0.0,0.0,0.0,0.0,0.00,1082984.0,2963668.0,73.24,174284.0,1251584.0,4414052.0,53.58,1593176.0,1045972.0,24.0,1204060.0,239068.0,9472.0,35164.0,0.0,4.04,0.0,1.52,0.00,0.0,0.0,0.05,0.0,0.0,94.39,0.0,0.0,0.50,1794.08,250.0,4,0,1623269881,0.24,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,4,0,smod,ataque,smod,ataque
13214,0.70,0.0,0.15,0.0,0.0,0.0,0.5,0.00,1086924.0,2959728.0,73.14,174284.0,1251440.0,4416332.0,53.60,1589748.0,1045856.0,84.0,1200684.0,239068.0,9472.0,35000.0,0.0,3.86,0.0,1.20,0.00,0.0,0.0,0.00,0.0,0.0,94.93,0.0,0.0,0.20,1764.14,190.0,4,0,1623269891,0.48,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,4,0,smod,ataque,smod,ataque
13215,0.20,0.0,0.07,0.0,0.0,0.0,0.4,0.00,1087032.0,2959620.0,73.14,174284.0,1251452.0,4406420.0,53.48,1589792.0,1045852.0,100.0,1200716.0,239068.0,9472.0,35000.0,0.0,5.77,0.0,1.66,0.00,0.0,0.0,0.00,0.0,0.0,92.58,0.0,0.0,0.50,2027.62,1417.0,4,0,1623269901,0.32,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,4,0,smod,ataque,smod,ataque
13216,0.00,0.0,0.00,0.0,0.0,0.0,0.0,0.00,1087172.0,2959480.0,73.13,174284.0,1251468.0,4409196.0,53.52,1589840.0,1045860.0,108.0,1200772.0,239068.0,9472.0,35004.0,0.0,7.76,0.0,2.22,0.00,0.0,0.0,0.05,0.0,0.0,89.96,0.0,0.0,0.30,2589.62,1437.0,4,0,1623269911,0.28,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,4,0,smod,ataque,smod,ataque


In [185]:
new_df.isna().sum() / len(new_df)

txpck                0.0
rxkB                 0.0
txkB                 0.0
rxcmp                0.0
txcmp                0.0
                    ... 
classe_num           0.0
class_atk_sysmon     0.0
class_sysmon         0.0
class_atk_sysstat    0.0
class_sysstat        0.0
Length: 69, dtype: float64

In [186]:
len(new_df), len(df)

(13218, 22345)

In [187]:
len(new_df.columns)

69

In [188]:
new_df.to_csv('sysmon-sysstat.csv', index=False)

## Checking if the number of columns matches with the data fields in sysstat-sysmon.csv file

In [5]:
a = '1622326821.0, enp0s3, "0,20", "0,00", "0,01", "0,00", "0,00", "0,00", "0,00", "0,00", 1837300, 2209352, "54,60", 160044, 681744, 4297120, "52,16", 1313120, 609012, 552, 1072388, 204644, 9456, 34732, 0, all, "6,24", "0,00", "2,03", "0,00", "0,00", "0,00", "0,00", "0,00", "0,00", "91,74", "0,00", "0,00", "0,60", "2143,71", 909.0, acunetix, ataque'

In [12]:
aa = [float(i.replace(',', '.').replace('\"', '')) if ',' in i or '.' in i else i for i in a.split(', ')]

In [14]:
len(aa)

43

In [15]:
len(headers.split(','))

65

# Merging ```sysmon-sysstat.csv``` to ```scada-all-attacks.csv```

In [3]:
df = pd.read_csv('sysmon-sysstat.csv')
scada = pd.read_csv('scada-all-attacks.csv')

In [4]:
df.head()

Unnamed: 0,txpck,rxkB,txkB,rxcmp,txcmp,rxmcst,ifutil,kbmemfree,kbmemused,memused,kbbuffers,kbcached,kbcommit,commit,kbactive,kbinact,kbdirty,kbanonpg,kbslab,kbkstack,kbpgtbl,kbvmused,CPU,nice,sys,iowait,steal,irq,soft,guest,gnice,idle,pswpin,pswpout,proc,cswch,intr,sum,classe_atk_num_sysmon,classe_num_sysmon,timestamp,ID1,ID2,ID3,ID4,ID5,ID6,ID7,ID8,ID9,ID10,ID11,ID12,ID13,ID14,ID15,ID16,ID17,ID18,ID19,ID20,ID21,ID22,classe_atk_num,classe_num,class_atk_sysmon,class_sysmon,class_atk_sysstat,class_sysstat
0,4434.23,0.0,1409.24,0.0,0.0,0.0,0.0,1.15,1119272.0,2927380.0,72.34,166500.0,998980.0,4975052.0,60.38,1885928.0,746856.0,572.0,1458716.0,210100.0,9520.0,36144.0,0.0,28.69,0.0,1.7,0.0,0.0,0.0,11.41,0.0,0.0,58.2,0.0,0.0,0.5,1073.73,284129.0,0,0,1622399861,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,0,acunetix,ataque,acunetix,ataque
1,4468.06,0.0,1567.06,0.0,0.0,0.0,0.3,1.28,1118744.0,2927908.0,72.35,166500.0,999236.0,4975052.0,60.38,1886252.0,746848.0,744.0,1458768.0,210100.0,9520.0,36068.0,0.0,24.07,0.0,0.59,0.0,0.0,0.0,2.05,0.0,0.0,73.3,0.0,0.0,0.2,399.8,301597.0,0,0,1622399871,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,0,acunetix,ataque,acunetix,ataque
2,3302.51,0.0,1168.5,0.0,0.0,0.0,0.3,0.96,1114452.0,2932200.0,72.46,166500.0,999512.0,4983748.0,60.49,1890156.0,746852.0,336.0,1462388.0,210100.0,9520.0,36300.0,0.0,15.28,0.0,0.39,0.0,0.0,0.0,1.37,0.0,0.0,82.95,0.0,0.0,0.5,421.54,217705.0,0,0,1622399881,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,0,acunetix,ataque,acunetix,ataque
3,3978.12,0.0,1354.35,0.0,0.0,0.0,0.0,1.11,1117724.0,2928928.0,72.38,166500.0,999736.0,4983748.0,60.49,1887732.0,746848.0,532.0,1459704.0,210092.0,9504.0,36072.0,0.0,19.99,0.0,0.43,0.0,0.0,0.0,0.43,0.0,0.0,79.15,0.0,0.0,0.2,312.09,288991.0,0,0,1622399891,4.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,0,acunetix,ataque,acunetix,ataque
4,4272.78,0.0,1455.68,0.0,0.0,0.0,0.0,1.19,1117000.0,2929652.0,72.4,166500.0,1000024.0,4983748.0,60.49,1887976.0,746856.0,720.0,1459744.0,210092.0,9504.0,36072.0,0.0,23.42,0.0,0.2,0.0,0.0,0.0,6.32,0.0,0.0,70.06,0.0,0.0,0.2,353.64,282892.0,0,0,1622399901,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,0,acunetix,ataque,acunetix,ataque


In [5]:
scada.head()

Unnamed: 0,total_fpackets,total_fvolume,total_bpackets,total_bvolume,min_fpktl,mean_fpktl,max_fpktl,std_fpktl,min_bpktl,mean_bpktl,max_bpktl,std_bpktl,min_fiat,mean_fiat,max_fiat,std_fiat,min_biat,mean_biat,max_biat,std_biat,duration,min_active,mean_active,max_active,std_active,min_idle,mean_idle,max_idle,std_idle,sflow_fpackets,sflow_fbytes,sflow_bpackets,sflow_bbytes,fpsh_cnt,bpsh_cnt,furg_cnt,burg_cnt,total_fhlen,total_bhlen,timestamp,iptables,snort_linux,snort_win,tipo,attack
0,192,10113,331,178335,52,52,173,8,40,538,552,73,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,192,10113,331,178335,1,0,0,0,9992,13260,1625841825,0,0,0,normal,0
1,102,5433,164,87919,52,53,173,11,40,536,552,83,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,102,5433,164,87919,1,0,0,0,5312,6580,1622401287,0,0,0,acunetix-sql-injection,0
2,70,3767,93,45835,52,53,171,14,40,492,552,133,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,70,3767,93,45835,1,0,0,0,3648,3740,1623183971,0,0,0,smod-read-coils,0
3,320,16769,479,257012,52,52,173,6,40,536,552,77,0,0,1,0,0,0,1,0,3,3,3,3,0,0,0,0,0,320,16769,479,257012,1,0,0,0,16648,19180,1625843936,0,0,0,normal,0
4,175,9229,263,142991,52,52,173,9,40,543,552,62,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,175,9229,263,142991,1,0,0,0,9108,10540,1623267949,0,0,0,smod-write-single-register,0


In [6]:
def round_date(x, freq='10S'):
    ret = pd.Timestamp(x, unit='s').round(freq=freq)
    ret = datetime.fromisoformat(str(ret)).timestamp()
    return int(ret)

In [8]:
scada['timestamp'] = scada['timestamp'].map(round_date)
df['timestamp'] = df['timestamp'].map(round_date)

In [9]:
df = df.sort_values('timestamp')
scada = scada.sort_values('timestamp')

In [10]:
scada.head()

Unnamed: 0,total_fpackets,total_fvolume,total_bpackets,total_bvolume,min_fpktl,mean_fpktl,max_fpktl,std_fpktl,min_bpktl,mean_bpktl,max_bpktl,std_bpktl,min_fiat,mean_fiat,max_fiat,std_fiat,min_biat,mean_biat,max_biat,std_biat,duration,min_active,mean_active,max_active,std_active,min_idle,mean_idle,max_idle,std_idle,sflow_fpackets,sflow_fbytes,sflow_bpackets,sflow_bbytes,fpsh_cnt,bpsh_cnt,furg_cnt,burg_cnt,total_fhlen,total_bhlen,timestamp,iptables,snort_linux,snort_win,tipo,attack
79713,9,408,4,181,40,45,52,6,40,45,52,6,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,9,408,4,181,2,1,0,0,384,172,1622340790,1,0,0,acunetix-xss,0
41579,9,408,4,181,40,45,52,6,40,45,52,6,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,9,408,4,181,2,1,0,0,384,172,1622340800,1,0,0,acunetix-xss,0
58098,11,488,4,181,40,44,52,6,40,45,52,6,0,0,1,0,1,0,1,0,1,1,1,1,0,0,0,0,0,11,488,4,181,2,1,0,0,464,172,1622340800,1,0,0,acunetix-xss,0
111111,11,488,4,181,40,44,52,6,40,45,52,6,0,0,1,0,1,0,1,0,1,1,1,1,0,0,0,0,0,11,488,4,181,2,1,0,0,464,172,1622340830,1,0,1,acunetix-xss,0
118612,11,488,4,181,40,44,52,6,40,45,52,6,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,11,488,4,181,2,1,0,0,464,172,1622340840,1,1,1,acunetix-xss,0


In [12]:
ret = pd.merge_asof(scada, df, on='timestamp', direction='nearest')
ret

Unnamed: 0,total_fpackets,total_fvolume,total_bpackets,total_bvolume,min_fpktl,mean_fpktl,max_fpktl,std_fpktl,min_bpktl,mean_bpktl,max_bpktl,std_bpktl,min_fiat,mean_fiat,max_fiat,std_fiat,min_biat,mean_biat,max_biat,std_biat,duration,min_active,mean_active,max_active,std_active,min_idle,mean_idle,max_idle,std_idle,sflow_fpackets,sflow_fbytes,sflow_bpackets,sflow_bbytes,fpsh_cnt,bpsh_cnt,furg_cnt,burg_cnt,total_fhlen,total_bhlen,timestamp,iptables,snort_linux,snort_win,tipo,attack,txpck,rxkB,txkB,rxcmp,txcmp,rxmcst,ifutil,kbmemfree,kbmemused,memused,kbbuffers,kbcached,kbcommit,commit,kbactive,kbinact,kbdirty,kbanonpg,kbslab,kbkstack,kbpgtbl,kbvmused,CPU,nice,sys,iowait,steal,irq,soft,guest,gnice,idle,pswpin,pswpout,proc,cswch,intr,sum,classe_atk_num_sysmon,classe_num_sysmon,ID1,ID2,ID3,ID4,ID5,ID6,ID7,ID8,ID9,ID10,ID11,ID12,ID13,ID14,ID15,ID16,ID17,ID18,ID19,ID20,ID21,ID22,classe_atk_num,classe_num,class_atk_sysmon,class_sysmon,class_atk_sysstat,class_sysstat
0,9,408,4,181,40,45,52,6,40,45,52,6,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,9,408,4,181,2,1,0,0,384,172,1622340790,1,0,0,acunetix-xss,0,4.99,0.0,0.30,0.0,0.0,0.0,0.0,0.0,1342988.0,2703664.0,66.81,160572.0,716500.0,5015788.0,60.88,1807584.0,606912.0,100.0,1529436.0,204816.0,9472.0,35860.0,0.0,41.66,0.0,1.66,0.0,0.0,0.0,0.00,0.0,0.0,56.68,0.0,0.0,0.4,1650.65,648.0,0,0,0.00,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.00,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,0,acunetix,ataque,acunetix,ataque
1,9,408,4,181,40,45,52,6,40,45,52,6,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,9,408,4,181,2,1,0,0,384,172,1622340800,1,0,0,acunetix-xss,0,17.86,0.0,1.08,0.0,0.0,0.0,0.0,0.0,1343420.0,2703232.0,66.80,160572.0,716756.0,5009468.0,60.80,1807804.0,606892.0,116.0,1529488.0,204816.0,9504.0,35916.0,0.0,6.50,0.0,1.99,0.0,0.0,0.0,0.05,0.0,0.0,91.45,0.0,0.0,0.6,1917.37,2934.0,0,0,4.00,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.00,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,0,acunetix,ataque,acunetix,ataque
2,11,488,4,181,40,44,52,6,40,45,52,6,0,0,1,0,1,0,1,0,1,1,1,1,0,0,0,0,0,11,488,4,181,2,1,0,0,464,172,1622340800,1,0,0,acunetix-xss,0,17.86,0.0,1.08,0.0,0.0,0.0,0.0,0.0,1343420.0,2703232.0,66.80,160572.0,716756.0,5009468.0,60.80,1807804.0,606892.0,116.0,1529488.0,204816.0,9504.0,35916.0,0.0,6.50,0.0,1.99,0.0,0.0,0.0,0.05,0.0,0.0,91.45,0.0,0.0,0.6,1917.37,2934.0,0,0,4.00,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.00,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,0,acunetix,ataque,acunetix,ataque
3,11,488,4,181,40,44,52,6,40,45,52,6,0,0,1,0,1,0,1,0,1,1,1,1,0,0,0,0,0,11,488,4,181,2,1,0,0,464,172,1622340830,1,0,1,acunetix-xss,0,21.26,0.0,1.29,0.0,0.0,0.0,0.0,0.0,1342940.0,2703712.0,66.81,160572.0,716684.0,5016004.0,60.88,1808220.0,606932.0,64.0,1529812.0,204816.0,9472.0,35860.0,0.0,4.12,0.0,1.15,0.0,0.0,0.0,0.05,0.0,0.0,94.68,0.0,0.0,0.2,1677.64,2256.0,0,0,0.00,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.00,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,0,acunetix,ataque,acunetix,ataque
4,11,488,4,181,40,44,52,6,40,45,52,6,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,11,488,4,181,2,1,0,0,464,172,1622340840,1,1,1,acunetix-xss,0,20.69,0.0,1.26,0.0,0.0,0.0,0.0,0.0,1338544.0,2708108.0,66.92,160572.0,717004.0,5019580.0,60.93,1811304.0,607108.0,100.0,1532860.0,204816.0,9472.0,36084.0,0.0,4.65,0.0,1.53,0.0,0.0,0.0,0.16,0.0,0.0,93.66,0.0,0.0,0.5,1762.36,2118.0,0,0,0.00,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.00,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,0,acunetix,ataque,acunetix,ataque
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
237431,141,7461,186,94122,52,52,173,10,40,506,552,119,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,141,7461,186,94122,1,0,0,0,7340,7460,1625855270,0,0,0,normal,0,0.90,0.0,0.06,0.0,0.0,0.0,0.1,0.0,784464.0,3262188.0,80.61,175872.0,1471704.0,4451304.0,54.03,1764636.0,1165072.0,176.0,1272776.0,249508.0,9504.0,35252.0,0.0,6.19,0.0,1.69,0.0,0.0,0.0,0.05,0.0,0.0,92.07,0.0,0.0,0.3,1924.25,450.0,4,0,0.76,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.08,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,4,0,smod,ataque,smod,ataque
237432,213,11205,295,160492,52,52,173,8,40,544,552,61,0,0,1,0,0,0,1,0,1,1,1,1,0,0,0,0,0,213,11205,295,160492,1,0,0,0,11084,11820,1625855270,0,0,0,normal,0,0.90,0.0,0.06,0.0,0.0,0.0,0.1,0.0,784464.0,3262188.0,80.61,175872.0,1471704.0,4451304.0,54.03,1764636.0,1165072.0,176.0,1272776.0,249508.0,9504.0,35252.0,0.0,6.19,0.0,1.69,0.0,0.0,0.0,0.05,0.0,0.0,92.07,0.0,0.0,0.3,1924.25,450.0,4,0,0.76,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.08,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,4,0,smod,ataque,smod,ataque
237433,194,10217,304,165112,52,52,173,8,40,543,552,63,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,194,10217,304,165112,1,0,0,0,10096,12180,1625855270,0,0,0,normal,0,0.90,0.0,0.06,0.0,0.0,0.0,0.1,0.0,784464.0,3262188.0,80.61,175872.0,1471704.0,4451304.0,54.03,1764636.0,1165072.0,176.0,1272776.0,249508.0,9504.0,35252.0,0.0,6.19,0.0,1.69,0.0,0.0,0.0,0.05,0.0,0.0,92.07,0.0,0.0,0.3,1924.25,450.0,4,0,0.76,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.08,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,4,0,smod,ataque,smod,ataque
237434,125,6629,175,95596,52,53,173,10,40,546,552,53,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,125,6629,175,95596,1,0,0,0,6508,7020,1625855270,0,0,0,normal,0,0.90,0.0,0.06,0.0,0.0,0.0,0.1,0.0,784464.0,3262188.0,80.61,175872.0,1471704.0,4451304.0,54.03,1764636.0,1165072.0,176.0,1272776.0,249508.0,9504.0,35252.0,0.0,6.19,0.0,1.69,0.0,0.0,0.0,0.05,0.0,0.0,92.07,0.0,0.0,0.3,1924.25,450.0,4,0,0.76,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.08,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,4,0,smod,ataque,smod,ataque


In [13]:
# Checking which columns have missing values. Using this approach to avoid truncated output from jupyter.
", ".join(str(i) for i in ret.isna().sum())

'0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0'

## Normalizing data

In [14]:
from sklearn.preprocessing import PowerTransformer

In [16]:
classes_columns = ['classe_atk_num_sysmon', 'classe_num_sysmon', 'timestamp', 'classe_atk_num', 'classe_num', 'classe_atk_num', 'classe_num', 'classe_atk_num_sysmon', 'classe_num_sysmon', 'attack', 'tipo', 'iptables', 'snort_linux', 'snort_win']
features = ret.drop(classes_columns, axis=1).select_dtypes('number').columns

In [20]:
pt = PowerTransformer()
ret[features] = pt.fit_transform(np.array(ret[features]))

  loglike = -n_samples / 2 * np.log(x_trans.var())
  x = um.multiply(x, x, out=x)
  ret = umr_sum(x, axis, dtype, out, keepdims=keepdims, where=where)


In [21]:
ret

Unnamed: 0,total_fpackets,total_fvolume,total_bpackets,total_bvolume,min_fpktl,mean_fpktl,max_fpktl,std_fpktl,min_bpktl,mean_bpktl,max_bpktl,std_bpktl,min_fiat,mean_fiat,max_fiat,std_fiat,min_biat,mean_biat,max_biat,std_biat,duration,min_active,mean_active,max_active,std_active,min_idle,mean_idle,max_idle,std_idle,sflow_fpackets,sflow_fbytes,sflow_bpackets,sflow_bbytes,fpsh_cnt,bpsh_cnt,furg_cnt,burg_cnt,total_fhlen,total_bhlen,timestamp,iptables,snort_linux,snort_win,tipo,attack,txpck,rxkB,txkB,rxcmp,txcmp,rxmcst,ifutil,kbmemfree,kbmemused,memused,kbbuffers,kbcached,kbcommit,commit,kbactive,kbinact,kbdirty,kbanonpg,kbslab,kbkstack,kbpgtbl,kbvmused,CPU,nice,sys,iowait,steal,irq,soft,guest,gnice,idle,pswpin,pswpout,proc,cswch,intr,sum,classe_atk_num_sysmon,classe_num_sysmon,ID1,ID2,ID3,ID4,ID5,ID6,ID7,ID8,ID9,ID10,ID11,ID12,ID13,ID14,ID15,ID16,ID17,ID18,ID19,ID20,ID21,ID22,classe_atk_num,classe_num,class_atk_sysmon,class_sysmon,class_atk_sysstat,class_sysstat
0,-0.531611,-0.973446,-1.166558,-1.349828,-1.399612,-1.757253,-1.586249,-0.911927,-0.516389,-1.458594,-1.485467,-1.335894,-0.219459,-0.167237,-0.703507,-0.254131,-0.283902,-0.21968,-0.685628,-0.305777,-0.701878,-0.701878,-0.701878,-0.701878,0.0,0.0,0.0,0.0,0.0,-0.531611,-0.973446,-1.166558,-1.349828,1.357180,0.370378,0.0,0.0,-0.627956,-1.312199,1622340790,1,0,0,acunetix-xss,0,-2.186866,-0.050626,-2.164174,-0.050626,0.0,0.0,-0.717871,-1.785545,2.329460,-2.551293,-2.551404,-2.076097,-2.269273,-0.221940,-0.332239,0.0,-1.980832,-1.341629,-0.600526,-1.773918,0.0,-0.961148,0.0,2.286249,-0.030175,-0.422248,-0.32594,0.0,0.0,-1.671728,0.0,0.0,-0.414336,-0.054765,-0.038968,0.780369,-0.247613,-2.151735,0,0,-0.864225,-0.010054,-0.19716,0.0,-0.042891,0.0,0.0,-0.013459,0.0,0.0,-0.078928,0.0,-0.439332,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-0.287435,0,0,acunetix,ataque,acunetix,ataque
1,-0.531611,-0.973446,-1.166558,-1.349828,-1.399612,-1.757253,-1.586249,-0.911927,-0.516389,-1.458594,-1.485467,-1.335894,-0.219459,-0.167237,-0.703507,-0.254131,-0.283902,-0.21968,-0.685628,-0.305777,-0.701878,-0.701878,-0.701878,-0.701878,0.0,0.0,0.0,0.0,0.0,-0.531611,-0.973446,-1.166558,-1.349828,1.357180,0.370378,0.0,0.0,-0.627956,-1.312199,1622340800,1,0,0,acunetix-xss,0,-2.156880,-0.050626,-2.157928,-0.050626,0.0,0.0,-0.717871,-1.785545,2.330793,-2.552917,-2.552925,-2.076097,-2.268218,-0.278495,-0.395807,0.0,-1.980925,-1.183894,-0.600248,-1.773918,0.0,-0.862623,0.0,-1.781447,-0.030175,-0.095933,-0.32594,0.0,0.0,-1.661034,0.0,0.0,1.662439,-0.054765,-0.038968,1.501590,0.639947,-2.106891,0,0,1.798073,-0.010054,-0.19716,0.0,-0.042891,0.0,0.0,-0.013459,0.0,0.0,-0.078928,0.0,-0.439332,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-0.287435,0,0,acunetix,ataque,acunetix,ataque
2,-0.344116,-0.792588,-1.166558,-1.349828,-1.399612,-1.948322,-1.586249,-0.911927,-0.516389,-1.458594,-1.485467,-1.335894,-0.219459,-0.167237,1.281818,-0.254131,3.521867,-0.21968,1.334646,-0.305777,1.226991,1.226991,1.226991,1.226991,0.0,0.0,0.0,0.0,0.0,-0.344116,-0.792588,-1.166558,-1.349828,1.357180,0.370378,0.0,0.0,-0.445020,-1.312199,1622340800,1,0,0,acunetix-xss,0,-2.156880,-0.050626,-2.157928,-0.050626,0.0,0.0,-0.717871,-1.785545,2.330793,-2.552917,-2.552925,-2.076097,-2.268218,-0.278495,-0.395807,0.0,-1.980925,-1.183894,-0.600248,-1.773918,0.0,-0.862623,0.0,-1.781447,-0.030175,-0.095933,-0.32594,0.0,0.0,-1.661034,0.0,0.0,1.662439,-0.054765,-0.038968,1.501590,0.639947,-2.106891,0,0,1.798073,-0.010054,-0.19716,0.0,-0.042891,0.0,0.0,-0.013459,0.0,0.0,-0.078928,0.0,-0.439332,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-0.287435,0,0,acunetix,ataque,acunetix,ataque
3,-0.344116,-0.792588,-1.166558,-1.349828,-1.399612,-1.948322,-1.586249,-0.911927,-0.516389,-1.458594,-1.485467,-1.335894,-0.219459,-0.167237,1.281818,-0.254131,3.521867,-0.21968,1.334646,-0.305777,1.226991,1.226991,1.226991,1.226991,0.0,0.0,0.0,0.0,0.0,-0.344116,-0.792588,-1.166558,-1.349828,1.357180,0.370378,0.0,0.0,-0.445020,-1.312199,1622340830,1,0,1,acunetix-xss,0,-2.150119,-0.050626,-2.156362,-0.050626,0.0,0.0,-0.717871,-1.785545,2.329312,-2.551112,-2.551404,-2.076097,-2.268515,-0.219979,-0.332239,0.0,-1.980739,-1.818800,-0.598517,-1.773918,0.0,-0.961148,0.0,-1.946499,-0.030175,-0.953455,-0.32594,0.0,0.0,-1.661034,0.0,0.0,1.794754,-0.054765,-0.038968,-0.999655,-0.167874,-2.119316,0,0,-0.864225,-0.010054,-0.19716,0.0,-0.042891,0.0,0.0,-0.013459,0.0,0.0,-0.078928,0.0,-0.439332,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-0.287435,0,0,acunetix,ataque,acunetix,ataque
4,-0.344116,-0.792588,-1.166558,-1.349828,-1.399612,-1.948322,-1.586249,-0.911927,-0.516389,-1.458594,-1.485467,-1.335894,-0.219459,-0.167237,-0.703507,-0.254131,-0.283902,-0.21968,-0.685628,-0.305777,-0.701878,-0.701878,-0.701878,-0.701878,0.0,0.0,0.0,0.0,0.0,-0.344116,-0.792588,-1.166558,-1.349828,1.357180,0.370378,0.0,0.0,-0.445020,-1.312199,1622340840,1,1,1,acunetix-xss,0,-2.151231,-0.050626,-2.156583,-0.050626,0.0,0.0,-0.717871,-1.785545,2.315748,-2.534590,-2.534675,-2.076097,-2.267196,-0.187251,-0.291295,0.0,-1.979922,-1.341629,-0.582195,-1.773918,0.0,-0.535638,0.0,-1.911879,-0.030175,-0.554306,-0.32594,0.0,0.0,-1.637745,0.0,0.0,1.753711,-0.054765,-0.038968,1.215095,0.096855,-2.121909,0,0,-0.864225,-0.010054,-0.19716,0.0,-0.042891,0.0,0.0,-0.013459,0.0,0.0,-0.078928,0.0,-0.439332,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-0.287435,0,0,acunetix,ataque,acunetix,ataque
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
237431,1.279352,1.151345,1.152842,0.987183,0.637303,-0.734262,0.068844,-0.392576,-0.516389,0.482981,0.220738,0.202233,-0.219459,-0.167237,-0.703507,-0.254131,-0.283902,-0.21968,-0.685628,-0.305777,-0.701878,-0.701878,-0.701878,-0.701878,0.0,0.0,0.0,0.0,0.0,1.279352,1.151345,1.152842,0.987183,-0.358210,-0.957442,0.0,0.0,1.278473,1.156292,1625855270,0,0,0,normal,0,-2.200005,-0.050626,-2.166283,-0.050626,0.0,0.0,0.903720,-1.785545,0.549286,-0.521106,-0.521614,1.512772,0.638658,-2.083355,-1.968737,0.0,1.084640,-0.743777,-1.725064,1.460384,0.0,-1.764162,0.0,-1.804275,-0.030175,-0.392069,-0.32594,0.0,0.0,-1.661034,0.0,0.0,1.688377,-0.054765,-0.038968,0.100638,0.665868,-2.156306,4,0,0.919951,-0.010054,-0.19716,0.0,-0.042891,0.0,0.0,-0.013459,0.0,0.0,-0.078928,0.0,2.279867,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-0.287435,4,0,smod,ataque,smod,ataque
237432,1.434104,1.342169,1.349888,1.217251,0.637303,-0.734262,0.068844,-0.612513,-0.516389,0.572482,0.220738,-0.215796,-0.219459,-0.167237,1.281818,-0.254131,-0.283902,-0.21968,1.334646,-0.305777,1.226991,1.226991,1.226991,1.226991,0.0,0.0,0.0,0.0,0.0,1.434104,1.342169,1.349888,1.217251,-0.358210,-0.957442,0.0,0.0,1.428745,1.351252,1625855270,0,0,0,normal,0,-2.200005,-0.050626,-2.166283,-0.050626,0.0,0.0,0.903720,-1.785545,0.549286,-0.521106,-0.521614,1.512772,0.638658,-2.083355,-1.968737,0.0,1.084640,-0.743777,-1.725064,1.460384,0.0,-1.764162,0.0,-1.804275,-0.030175,-0.392069,-0.32594,0.0,0.0,-1.661034,0.0,0.0,1.688377,-0.054765,-0.038968,0.100638,0.665868,-2.156306,4,0,0.919951,-0.010054,-0.19716,0.0,-0.042891,0.0,0.0,-0.013459,0.0,0.0,-0.078928,0.0,2.279867,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-0.287435,4,0,smod,ataque,smod,ataque
237433,1.401013,1.300573,1.362160,1.229634,0.637303,-0.734262,0.068844,-0.612513,-0.516389,0.570178,0.220738,-0.196632,-0.219459,-0.167237,-0.703507,-0.254131,-0.283902,-0.21968,-0.685628,-0.305777,-0.701878,-0.701878,-0.701878,-0.701878,0.0,0.0,0.0,0.0,0.0,1.401013,1.300573,1.362160,1.229634,-0.358210,-0.957442,0.0,0.0,1.396418,1.363361,1625855270,0,0,0,normal,0,-2.200005,-0.050626,-2.166283,-0.050626,0.0,0.0,0.903720,-1.785545,0.549286,-0.521106,-0.521614,1.512772,0.638658,-2.083355,-1.968737,0.0,1.084640,-0.743777,-1.725064,1.460384,0.0,-1.764162,0.0,-1.804275,-0.030175,-0.392069,-0.32594,0.0,0.0,-1.661034,0.0,0.0,1.688377,-0.054765,-0.038968,0.100638,0.665868,-2.156306,4,0,0.919951,-0.010054,-0.19716,0.0,-0.042891,0.0,0.0,-0.013459,0.0,0.0,-0.078928,0.0,2.279867,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-0.287435,4,0,smod,ataque,smod,ataque
237434,1.229742,1.092053,1.125543,0.993808,0.637303,-0.622674,0.068844,-0.392576,-0.516389,0.577082,0.220738,-0.298129,-0.219459,-0.167237,-0.703507,-0.254131,-0.283902,-0.21968,-0.685628,-0.305777,-0.701878,-0.701878,-0.701878,-0.701878,0.0,0.0,0.0,0.0,0.0,1.229742,1.092053,1.125543,0.993808,-0.358210,-0.957442,0.0,0.0,1.230748,1.129198,1625855270,0,0,0,normal,0,-2.200005,-0.050626,-2.166283,-0.050626,0.0,0.0,0.903720,-1.785545,0.549286,-0.521106,-0.521614,1.512772,0.638658,-2.083355,-1.968737,0.0,1.084640,-0.743777,-1.725064,1.460384,0.0,-1.764162,0.0,-1.804275,-0.030175,-0.392069,-0.32594,0.0,0.0,-1.661034,0.0,0.0,1.688377,-0.054765,-0.038968,0.100638,0.665868,-2.156306,4,0,0.919951,-0.010054,-0.19716,0.0,-0.042891,0.0,0.0,-0.013459,0.0,0.0,-0.078928,0.0,2.279867,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-0.287435,4,0,smod,ataque,smod,ataque


In [22]:
ret.to_csv('scada-sysmon-sysstat.csv', index=False)