In [1]:
# import library
import pandas as pd 
import numpy as np 
import matplotlib.pyplot as plt 
import seaborn as sns 
import plotly.express as px 
from datetime import datetime
import os
%matplotlib inline

In [2]:
# tabel setting
# pd.set_option('display.max_columns',None)
# pd.set_option('display.max_rows',None)
# pd.set_option('display.precision', 2)
# pd.options.display.float_format = '{:,.2f}'.format

# Import Dataset

In [3]:
dataset_dir = './Dataset/NCC-2 Dataset Simultaneous Botnet Dataset/'
sensor1_path = dataset_dir + 'sensor1/'
sensor2_path = dataset_dir + 'sensor2/'
sensor2_path = dataset_dir + 'sensor2/'
allsensor_path = dataset_dir + 'all-sensors/'

In [4]:
os.listdir(sensor2_path)

['sensor2-activityAnalysisPerMinutes-botnetOnly.png',
 'sensor2-activityAnalysisPerMinutes.png',
 'sensor2-acumulatedonHour-botnetOnly.png',
 'sensor2-acumulatedonHour.png',
 'sensor2-description.txt',
 'sensor2.binetflow',
 'sensor2_botnet-only.binetflow',
 'sensor2_normal-only.binetflow']

# Sensor 2 .binetflow

In [5]:
# df1 -> all, df2 -> botnet only, df3 -> normal only
sensor2 = pd.read_csv(sensor2_path + 'sensor2.binetflow')

In [6]:
sensor2.shape

(5998133, 18)

In [7]:
sensor2.columns

Index(['StartTime', 'Dur', 'Proto', 'SrcAddr', 'Sport', 'Dir', 'DstAddr',
       'Dport', 'State', 'sTos', 'dTos', 'TotPkts', 'TotBytes', 'SrcBytes',
       'Label', 'ActivityLabel', 'BotnetName', 'SensorId'],
      dtype='object')

In [8]:
sensor2.dtypes

StartTime         object
Dur              float64
Proto             object
SrcAddr           object
Sport             object
Dir               object
DstAddr           object
Dport             object
State             object
sTos             float64
dTos             float64
TotPkts            int64
TotBytes           int64
SrcBytes           int64
Label             object
ActivityLabel      int64
BotnetName        object
SensorId           int64
dtype: object

In [9]:
sensor2.head()

Unnamed: 0,StartTime,Dur,Proto,SrcAddr,Sport,Dir,DstAddr,Dport,State,sTos,dTos,TotPkts,TotBytes,SrcBytes,Label,ActivityLabel,BotnetName,SensorId
0,2022-07-07 09:00:00,0.0,icmp,147.32.84.193,0x5303,->,147.32.96.69,,UR,0.0,,1,1066,1066,flow=From-Botnet-V51-ICMP,1,rbot,2
1,2022-07-07 09:00:00,2987.296875,udp,121.94.23.18,60729,<->,147.32.84.229,13363.0,CON,0.0,0.0,15,1455,308,flow=Background-UDP-Established,0,-,2
2,2022-07-07 09:00:00,3582.864502,tcp,147.32.84.229,443,<?>,212.103.28.2,41132.0,PA_PA,0.0,0.0,1021,96853,40471,flow=Background,0,-,2
3,2022-07-07 09:00:00,3454.736816,udp,110.67.100.220,54196,<->,147.32.84.229,13363.0,CON,0.0,0.0,19,3370,1544,flow=Background-UDP-Established,0,-,2
4,2022-07-07 09:00:00,3253.297363,udp,147.32.84.229,13363,->,94.221.209.28,30564.0,INT,0.0,,16,2384,2384,flow=Background-UDP-Attempt,0,-,2


In [10]:
sensor2_cat = sensor2.select_dtypes('object')
sensor2_num = sensor2.select_dtypes(['int64', 'float64'])

In [11]:
print('Kolom Categorical :', sensor2_cat.columns.value_counts().count())
print('Kolom Numerical :', sensor2_num.columns.value_counts().count())

Kolom Categorical : 10
Kolom Numerical : 8


In [12]:
del sensor2_cat, sensor2_num

## Check Missing Value

In [13]:
# Cek Missing Data
total_missing = sensor2.isnull().sum().sort_values(ascending=False)
percent_1 = sensor2.isnull().sum()/sensor2.isnull().count()*100
percent_2 = (round(percent_1, 1)).sort_values(ascending=False)
missing_data = pd.concat([total_missing, percent_2], axis=1, keys=['Total Missing', '%'])
missing_data = missing_data.reset_index().rename(columns={'index': 'Column'})
missing_data

Unnamed: 0,Column,Total Missing,%
0,dTos,556961,9.3
1,sTos,29739,0.5
2,Sport,22675,0.4
3,Dport,22181,0.4
4,State,159,0.0
5,TotPkts,0,0.0
6,BotnetName,0,0.0
7,ActivityLabel,0,0.0
8,Label,0,0.0
9,SrcBytes,0,0.0


## Cek Kolom

### Start Time

In [14]:
pd.DataFrame(sensor2['StartTime'].value_counts()).reset_index()

Unnamed: 0,StartTime,count
0,2022-07-07 12:04:18,1233
1,2022-07-07 13:30:06,1227
2,2022-07-07 11:48:40,1205
3,2022-07-07 09:28:27,1113
4,2022-07-07 10:59:28,1109
...,...,...
28796,2022-07-07 15:01:34,18
28797,2022-07-07 14:56:56,18
28798,2022-07-07 17:00:00,18
28799,2022-07-07 16:00:51,17


### Dir

In [15]:
pd.DataFrame(sensor2['Dir'].value_counts()).reset_index()

Unnamed: 0,Dir,count
0,<->,4190332
1,->,1751397
2,<?>,22345
3,<-,21159
4,?>,11630
5,who,1262
6,<?,8


### State

In [16]:
pd.DataFrame(sensor2['State'].value_counts()).reset_index()

Unnamed: 0,State,count
0,CON,4187958
1,FSPA_FSPA,649220
2,INT,352118
3,SRPA_SPA,105642
4,S_,92170
...,...,...
362,TST,1
363,FSPAC_FA,1
364,_S,1
365,FSRA_SRPA,1


### BotNet Name

In [17]:
pd.DataFrame(sensor2['BotnetName'].value_counts()).reset_index()

Unnamed: 0,BotnetName,count
0,-,5634133
1,neris,267000
2,rbot,72000
3,virut,19000
4,menti,6000


### SrcAddr

In [18]:
pd.DataFrame(sensor2['SrcAddr'].value_counts()).reset_index()

Unnamed: 0,SrcAddr,count
0,147.32.84.138,1083485
1,147.32.84.59,659259
2,147.32.84.229,287452
3,147.32.85.25,130765
4,147.32.84.165,104001
...,...,...
861583,86.178.83.4,1
861584,101.142.153.231,1
861585,113.162.75.111,1
861586,77.76.169.34,1


### DstAddr

In [19]:
pd.DataFrame(sensor2['DstAddr'].value_counts()).reset_index()

Unnamed: 0,DstAddr,count
0,147.32.80.9,2167206
1,147.32.84.229,1786142
2,147.32.80.13,59973
3,147.32.86.165,33333
4,147.32.84.59,21910
...,...,...
230013,96.55.169.113,1
230014,168.95.5.79,1
230015,125.228.148.232,1
230016,69.249.68.41,1


### Proto

In [20]:
pd.DataFrame(sensor2['Proto'].value_counts()).reset_index()

Unnamed: 0,Proto,count
0,udp,4536360
1,tcp,1341742
2,icmp,104724
3,rtp,4993
4,igmp,4616
5,rtcp,4141
6,arp,1255
7,ipv6-icmp,160
8,esp,33
9,ipx/spx,33


### Sport

In [21]:
pd.DataFrame(sensor2['Sport'].value_counts()).reset_index()

Unnamed: 0,Sport,count
0,13363,286525
1,0x0303,53047
2,80,23775
3,1025,14252
4,123,12760
...,...,...
74395,0x0c97,1
74396,0xa204,1
74397,0x93b7,1
74398,0xbf31,1


### Dport

In [22]:
pd.DataFrame(sensor2['Dport'].value_counts()).reset_index()

Unnamed: 0,Dport,count
0,53,2196424
1,13363,1704419
2,80,775293
3,443,206234
4,6881,50875
...,...,...
78575,12615,1
78576,26641,1
78577,0x890e,1
78578,0xed05,1


### dTos

In [23]:
pd.DataFrame(sensor2['dTos'].value_counts()).reset_index()

Unnamed: 0,dTos,count
0,0.0,5440233
1,2.0,449
2,3.0,437
3,1.0,53


### sTos

In [24]:
pd.DataFrame(sensor2['sTos'].value_counts()).reset_index()

Unnamed: 0,sTos,count
0,0.0,5962442
1,2.0,2116
2,3.0,1847
3,1.0,1041
4,192.0,948


## Labeling

In [25]:
pd.DataFrame(sensor2['Label'].value_counts()).reset_index()

Unnamed: 0,Label,count
0,flow=To-Background-UDP-CVUT-DNS-Server,2112392
1,flow=Background-UDP-Established,1933263
2,flow=Background-TCP-Established,561900
3,flow=Background-Established-cmpgw-CVUT,321085
4,flow=Background-UDP-Attempt,293751
...,...,...
173,flow=From-Normal-V51-MatLab-Server,4
174,flow=From-Normal-V43-MatLab-Server,3
175,flow=From-Normal-V47-MatLab-Server,2
176,flow=Normal-V43-HTTP-windowsupdate,2


In [26]:
def categorize_label(label):
    label = label.lower()  
    if 'botnet' in label and 'spam' in label:
        return 2
    elif 'botnet' in label:
        return 1
    else:
        return 0

In [27]:
sensor2['Label'] = sensor2['Label'].apply(categorize_label)

In [28]:
sensor2.head()

Unnamed: 0,StartTime,Dur,Proto,SrcAddr,Sport,Dir,DstAddr,Dport,State,sTos,dTos,TotPkts,TotBytes,SrcBytes,Label,ActivityLabel,BotnetName,SensorId
0,2022-07-07 09:00:00,0.0,icmp,147.32.84.193,0x5303,->,147.32.96.69,,UR,0.0,,1,1066,1066,1,1,rbot,2
1,2022-07-07 09:00:00,2987.296875,udp,121.94.23.18,60729,<->,147.32.84.229,13363.0,CON,0.0,0.0,15,1455,308,0,0,-,2
2,2022-07-07 09:00:00,3582.864502,tcp,147.32.84.229,443,<?>,212.103.28.2,41132.0,PA_PA,0.0,0.0,1021,96853,40471,0,0,-,2
3,2022-07-07 09:00:00,3454.736816,udp,110.67.100.220,54196,<->,147.32.84.229,13363.0,CON,0.0,0.0,19,3370,1544,0,0,-,2
4,2022-07-07 09:00:00,3253.297363,udp,147.32.84.229,13363,->,94.221.209.28,30564.0,INT,0.0,,16,2384,2384,0,0,-,2


In [29]:
sensor2['Label'].value_counts()

Label
0    5634133
1     339000
2      25000
Name: count, dtype: int64

## Cleaning Data

### Drop Duplicates

In [30]:
sensor2 = sensor2.drop_duplicates()

In [31]:
sensor2.shape

(5996056, 18)

### Drop dTos

In [32]:
cek = sensor2[(sensor2['dTos'].isnull())]

In [33]:
cek['Label'].value_counts()

Label
0    492659
1     50891
2     13000
Name: count, dtype: int64

In [34]:
cek = cek[cek['Label'] == 0]

In [35]:
cek['Label'].value_counts()

Label
0    492659
Name: count, dtype: int64

In [36]:
sensor2 = sensor2.drop(cek.index)

In [37]:
# Cek Missing Data
total_missing = sensor2.isnull().sum().sort_values(ascending=False)
percent_1 = sensor2.isnull().sum()/sensor2.isnull().count()*100
percent_2 = (round(percent_1, 1)).sort_values(ascending=False)
missing_data = pd.concat([total_missing, percent_2], axis=1, keys=['Total Missing', '%'])
missing_data = missing_data.reset_index().rename(columns={'index': 'Column'})
missing_data

Unnamed: 0,Column,Total Missing,%
0,dTos,63891,1.2
1,sTos,28441,0.5
2,Sport,16674,0.3
3,Dport,11233,0.2
4,State,158,0.0
5,TotPkts,0,0.0
6,BotnetName,0,0.0
7,ActivityLabel,0,0.0
8,Label,0,0.0
9,SrcBytes,0,0.0


### Drop sTos

In [38]:
cek = sensor2[(sensor2['sTos'].isnull())]

In [39]:
cek['Label'].value_counts()

Label
0    28441
Name: count, dtype: int64

In [40]:
sensor2 = sensor2.drop(cek.index)

In [41]:
# Cek Missing Data
total_missing = sensor2.isnull().sum().sort_values(ascending=False)
percent_1 = sensor2.isnull().sum()/sensor2.isnull().count()*100
percent_2 = (round(percent_1, 1)).sort_values(ascending=False)
missing_data = pd.concat([total_missing, percent_2], axis=1, keys=['Total Missing', '%'])
missing_data = missing_data.reset_index().rename(columns={'index': 'Column'})
missing_data

Unnamed: 0,Column,Total Missing,%
0,dTos,63891,1.2
1,Dport,11159,0.2
2,Sport,679,0.0
3,State,158,0.0
4,StartTime,0,0.0
5,TotPkts,0,0.0
6,BotnetName,0,0.0
7,ActivityLabel,0,0.0
8,Label,0,0.0
9,SrcBytes,0,0.0


### Drop Dport

In [42]:
cek = sensor2[sensor2['Dport'].isnull()]

In [43]:
cek['Label'].value_counts()

Label
1    11140
0       19
Name: count, dtype: int64

In [44]:
cek = cek[cek['Label'] == 0]

In [45]:
cek['Label'].value_counts()

Label
0    19
Name: count, dtype: int64

In [46]:
sensor2 = sensor2.drop(cek.index)

In [47]:
# Cek Missing Data
total_missing = sensor2.isnull().sum().sort_values(ascending=False)
percent_1 = sensor2.isnull().sum()/sensor2.isnull().count()*100
percent_2 = (round(percent_1, 1)).sort_values(ascending=False)
missing_data = pd.concat([total_missing, percent_2], axis=1, keys=['Total Missing', '%'])
missing_data = missing_data.reset_index().rename(columns={'index': 'Column'})
missing_data

Unnamed: 0,Column,Total Missing,%
0,dTos,63891,1.2
1,Dport,11140,0.2
2,Sport,665,0.0
3,State,158,0.0
4,StartTime,0,0.0
5,TotPkts,0,0.0
6,BotnetName,0,0.0
7,ActivityLabel,0,0.0
8,Label,0,0.0
9,SrcBytes,0,0.0


### Drop Sport

In [48]:
cek = sensor2[sensor2['Sport'].isnull()]

In [49]:
cek['Label'].value_counts()

Label
1    665
Name: count, dtype: int64

In [50]:
# cek = cek[cek['Label'] == 0]

In [51]:
# cek['Label'].value_counts()

In [52]:
# sensor2 = sensor2.drop(cek.index)

In [53]:
# Cek Missing Data
total_missing = sensor2.isnull().sum().sort_values(ascending=False)
percent_1 = sensor2.isnull().sum()/sensor2.isnull().count()*100
percent_2 = (round(percent_1, 1)).sort_values(ascending=False)
missing_data = pd.concat([total_missing, percent_2], axis=1, keys=['Total Missing', '%'])
missing_data = missing_data.reset_index().rename(columns={'index': 'Column'})
missing_data

Unnamed: 0,Column,Total Missing,%
0,dTos,63891,1.2
1,Dport,11140,0.2
2,Sport,665,0.0
3,State,158,0.0
4,StartTime,0,0.0
5,TotPkts,0,0.0
6,BotnetName,0,0.0
7,ActivityLabel,0,0.0
8,Label,0,0.0
9,SrcBytes,0,0.0


### Drop State

In [54]:
cek = sensor2[sensor2['State'].isnull()]

In [55]:
cek['Label'].value_counts()

Label
1    158
Name: count, dtype: int64

In [56]:
# Cek Missing Data
total_missing = sensor2.isnull().sum().sort_values(ascending=False)
percent_1 = sensor2.isnull().sum()/sensor2.isnull().count()*100
percent_2 = (round(percent_1, 1)).sort_values(ascending=False)
missing_data = pd.concat([total_missing, percent_2], axis=1, keys=['Total Missing', '%'])
missing_data = missing_data.reset_index().rename(columns={'index': 'Column'})
missing_data

Unnamed: 0,Column,Total Missing,%
0,dTos,63891,1.2
1,Dport,11140,0.2
2,Sport,665,0.0
3,State,158,0.0
4,StartTime,0,0.0
5,TotPkts,0,0.0
6,BotnetName,0,0.0
7,ActivityLabel,0,0.0
8,Label,0,0.0
9,SrcBytes,0,0.0


## Imputing

### dTos

In [57]:
sensor2['dTos'].value_counts()

dTos
0.0    5410107
2.0        449
3.0        437
1.0         53
Name: count, dtype: int64

In [58]:
cek = sensor2[sensor2['dTos'].isnull()]

In [59]:
cek['Label'].value_counts()

Label
1    50891
2    13000
Name: count, dtype: int64

In [60]:
# Impute with modes
mode_value1 = sensor2[sensor2['Label'] == 1]['dTos'].mode()[0]
mode_value2 = sensor2[sensor2['Label'] == 2]['dTos'].mode()[0]

sensor2.loc[(sensor2['dTos'].isnull()) & (sensor2['Label'] == 1), 'dTos'] = mode_value1
sensor2.loc[(sensor2['dTos'].isnull()) & (sensor2['Label'] == 2), 'dTos'] = mode_value2

In [61]:
# Cek Missing Data
total_missing = sensor2.isnull().sum().sort_values(ascending=False)
percent_1 = sensor2.isnull().sum()/sensor2.isnull().count()*100
percent_2 = (round(percent_1, 1)).sort_values(ascending=False)
missing_data = pd.concat([total_missing, percent_2], axis=1, keys=['Total Missing', '%'])
missing_data = missing_data.reset_index().rename(columns={'index': 'Column'})
missing_data

Unnamed: 0,Column,Total Missing,%
0,Dport,11140,0.2
1,Sport,665,0.0
2,State,158,0.0
3,StartTime,0,0.0
4,TotPkts,0,0.0
5,BotnetName,0,0.0
6,ActivityLabel,0,0.0
7,Label,0,0.0
8,SrcBytes,0,0.0
9,TotBytes,0,0.0


### Dport

In [62]:
sensor2['Dport'].value_counts()

Dport
53        2194973
13363     1554370
80         764164
443        198224
6667        40466
           ...   
56586           1
51687           1
55362           1
51422           1
0x00f0          1
Name: count, Length: 57575, dtype: int64

In [63]:
cek = sensor2[sensor2['Dport'].isnull()]

In [64]:
cek['Label'].value_counts()

Label
1    11140
Name: count, dtype: int64

In [65]:
# Impute with modes
mode_value = sensor2[sensor2['Label'] == 1]['Dport'].mode()[0]
sensor2['Dport'] = sensor2['Dport'].fillna(mode_value)

### Sport

In [66]:
sensor2['Sport'].value_counts()

Sport
13363     227604
1025       13915
123        12318
1027        9965
80          8450
           ...  
0x35b9         1
0x14ff         1
0xe648         1
0xd1cf         1
0xe3d4         1
Name: count, Length: 74025, dtype: int64

In [67]:
cek = sensor2[sensor2['Sport'].isnull()]

In [68]:
cek['Label'].value_counts()

Label
1    665
Name: count, dtype: int64

In [69]:
# Impute with modes
mode_value = sensor2[sensor2['Label'] == 1]['Sport'].mode()[0]
sensor2['Sport'] = sensor2['Sport'].fillna(mode_value)

### State

In [70]:
sensor2['State'].value_counts()

State
CON            4187153
FSPA_FSPA       649010
SRPA_SPA        105224
SRPA_FSPA        87444
FSA_FSA          67662
                ...   
RA_SPA               1
SPAEC_FSRPA          1
SR_FPA               1
SA_FRA               1
RTS                  1
Name: count, Length: 333, dtype: int64

In [71]:
cek = sensor2[sensor2['State'].isnull()]

In [72]:
cek['Label'].value_counts()

Label
1    158
Name: count, dtype: int64

In [73]:
# Impute with modes
mode_value = sensor2[sensor2['Label'] == 1]['State'].mode()[0]
sensor2['State'] = sensor2['State'].fillna(mode_value)

In [74]:
# Cek Missing Data
total_missing = sensor2.isnull().sum().sort_values(ascending=False)
percent_1 = sensor2.isnull().sum()/sensor2.isnull().count()*100
percent_2 = (round(percent_1, 1)).sort_values(ascending=False)
missing_data = pd.concat([total_missing, percent_2], axis=1, keys=['Total Missing', '%'])
missing_data = missing_data.reset_index().rename(columns={'index': 'Column'})
missing_data

Unnamed: 0,Column,Total Missing,%
0,StartTime,0,0.0
1,Dur,0,0.0
2,BotnetName,0,0.0
3,ActivityLabel,0,0.0
4,Label,0,0.0
5,SrcBytes,0,0.0
6,TotBytes,0,0.0
7,TotPkts,0,0.0
8,dTos,0,0.0
9,sTos,0,0.0


In [75]:
# sensor2.to_csv('./Dataset/sensor2_clean.csv', index=False)

## Categorical Encoding

In [55]:
# df.to_csv('./Dataset/df_clean.csv')

In [76]:
dataset = pd.DataFrame(sensor2[[
    'StartTime',
    'SrcAddr',
    'DstAddr',
    'Sport',
    'Dport',
    'sTos',
    'dTos',
    'Dir',
    'State',
    'Proto',
    'Dur',
    'TotPkts',
    'TotBytes',
    'SrcBytes',
    'BotnetName',
    'Label',
    ]])

### Start Time

In [77]:
dataset['StartTime'] = pd.to_datetime(dataset['StartTime'], errors='coerce')

dataset['StartTimeHour'] = dataset['StartTime'].dt.hour
dataset['StartTimeMinute'] = dataset['StartTime'].dt.minute
dataset['StartTimeSecond'] = dataset['StartTime'].dt.second
dataset = dataset.drop(columns=['StartTime'])

### SrcAddr & DstAddr & Sport & Dport

In [78]:
# Frequency Encoding
def frequency_encoding(df):
    freq_map = df.value_counts().to_dict()  # Create frequency map
    df = df.map(freq_map)  # Apply frequency encoding
    return df

In [79]:
dataset['SrcAddr'] = frequency_encoding(dataset['SrcAddr'])
dataset['DstAddr'] = frequency_encoding(dataset['DstAddr'])
dataset['Sport'] = frequency_encoding(dataset['Sport'])
dataset['Dport'] = frequency_encoding(dataset['Dport'])

### Dir & State & dTos & sTos & Proto

In [80]:
# Label Encoding
from sklearn.preprocessing import LabelEncoder

le = LabelEncoder()
dataset['Dir'] = le.fit_transform(dataset['Dir'].astype(str))
dataset['State'] = le.fit_transform(dataset['State'].astype(str))
dataset['sTos'] = le.fit_transform(dataset['sTos'].astype(str))
dataset['dTos'] = le.fit_transform(dataset['dTos'].astype(str))
dataset['BotnetName'] = le.fit_transform(dataset['BotnetName'].astype(str))
dataset['Proto'] = le.fit_transform(dataset['Proto'].astype(str))

In [81]:
dataset.shape

(5474937, 18)

In [82]:
dataset.head()

Unnamed: 0,SrcAddr,DstAddr,Sport,Dport,sTos,dTos,Dir,State,Proto,Dur,TotPkts,TotBytes,SrcBytes,BotnetName,Label,StartTimeHour,StartTimeMinute,StartTimeSecond
0,27893,19372,1,775304,0,0,0,320,0,0.0,1,1066,1066,3,1,9,0,0
1,1,1615327,143,1554370,0,0,3,16,4,2987.296875,15,1455,308,0,0,9,0,0
2,228416,3,1698,4,0,0,5,178,3,3582.864502,1021,96853,40471,0,0,9,0,0
3,4,1615327,151,1554370,0,0,3,16,4,3454.736816,19,3370,1544,0,0,9,0,0
5,2,1615327,139,1554370,0,0,3,16,4,3279.283447,9,2209,1939,0,0,9,0,0


In [87]:
dataset['isBotnet'] = dataset['Label'].apply(lambda x: 1 if x == 1 or x == 2 else 0)
dataset['isSpam'] = dataset['Label'].apply(lambda x: 1 if x == 2 else 0)

In [88]:
dataset.columns

Index(['SrcAddr', 'DstAddr', 'Sport', 'Dport', 'sTos', 'dTos', 'Dir', 'State',
       'Proto', 'Dur', 'TotPkts', 'TotBytes', 'SrcBytes', 'BotnetName',
       'Label', 'StartTimeHour', 'StartTimeMinute', 'StartTimeSecond',
       'isBotnet', 'isSpam'],
      dtype='object')

In [89]:
# dataset.to_csv('./Dataset/sensor2_clean.csv', index=False)