In [1]:
# import library
import pandas as pd 
import numpy as np 
import matplotlib.pyplot as plt 
import seaborn as sns 
import plotly.express as px 
from datetime import datetime
import os
import tensorflow as tf
%matplotlib inline

from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

# Import Dataset

In [2]:
dataset_dir = '../Dataset/'

In [3]:
# Sensor 1
train1_isbotnet = pd.read_csv(dataset_dir + 'sensor1_isbotnet.csv')
train1_isspam = pd.read_csv(dataset_dir + 'sensor1_isspam.csv')
sensor1_test = pd.read_csv(dataset_dir + 'sensor1_test.csv')

In [4]:
sensor1_test.head()

Unnamed: 0,Dur,Proto,SrcAddr,Sport,Dir,DstAddr,Dport,State,sTos,dTos,TotPkts,TotBytes,SrcBytes,Label,StartTimeHour,StartTimeMinute,StartTimeSecond,isBotnet,isSpam
0,37.096378,3,138,74,0,2748,653182,144,0,0,38,25249,1455,0,14,23,33,0,0
1,0.001388,4,1,40,3,1326350,1274027,16,0,0,2,138,78,0,15,0,42,0,0
2,0.000382,4,527737,71,3,1823076,1852324,16,0,0,2,197,76,0,10,45,3,0,0
3,2799.228027,4,1,22,3,1326350,1274027,16,0,0,6,1170,989,0,12,19,54,0,0
4,0.004994,4,11142,137,3,1823076,1852324,16,0,0,2,196,73,0,11,33,47,0,0


In [5]:
# Sensor 2
train2_isbotnet = pd.read_csv(dataset_dir + 'sensor2_isbotnet.csv')
train2_isspam = pd.read_csv(dataset_dir + 'sensor2_isspam.csv')
sensor2_test = pd.read_csv(dataset_dir + 'sensor2_test.csv')

In [6]:
sensor2_test.head()

Unnamed: 0,Dur,Proto,SrcAddr,Sport,Dir,DstAddr,Dport,State,sTos,dTos,TotPkts,TotBytes,SrcBytes,Label,StartTimeHour,StartTimeMinute,StartTimeSecond,isBotnet,isSpam
0,0.000657,4,1,96,3,1615327,1554370,16,0,0,2,136,76,0,9,35,50,0,0
1,0.000191,4,1082471,146,3,2162514,2194973,16,0,0,2,214,81,0,13,9,40,0,0
2,0.002432,4,1,128,3,1615327,1554370,16,0,0,2,535,475,0,11,31,53,0,0
3,0.000232,4,617063,72,3,2162514,2194973,16,0,0,2,298,80,0,9,34,38,0,0
4,1310.473267,4,2,104,3,1615327,1554370,16,0,0,6,1197,1016,0,11,34,24,0,0


In [7]:
# Sensor 3
train3_isbotnet = pd.read_csv(dataset_dir + 'sensor3_isbotnet.csv')
train3_isspam = pd.read_csv(dataset_dir + 'sensor3_isspam.csv')
sensor3_test = pd.read_csv(dataset_dir + 'sensor3_test.csv')

In [8]:
sensor3_test.head()

Unnamed: 0,Dur,Proto,SrcAddr,Sport,Dir,DstAddr,Dport,State,sTos,dTos,TotPkts,TotBytes,SrcBytes,Label,StartTimeHour,StartTimeMinute,StartTimeSecond,isBotnet,isSpam
0,0.000179,4,136840,121,3,1615276,1628176,9,0,0,2,208,79,0,16,11,14,0,0
1,1277.22998,4,113768,113234,3,3,4,9,0,0,4,747,627,0,13,13,7,0,0
2,0.000302,4,657086,63,3,1615276,1628176,9,0,0,2,214,81,0,13,27,6,0,0
3,0.000228,4,587582,141,3,1615276,1628176,9,0,0,2,322,83,0,10,43,21,0,0
4,0.00042,4,83499,37,3,1615276,1628176,9,0,0,2,321,84,0,14,21,21,0,0


# Modeling

In [9]:
def predictionStack(clf1, clf2, X_test, botnet_features, spam_features):
    y_pred = []
    count = 0
    for index, row in X_test.iterrows():
        # create dataframe
        df_predict = pd.DataFrame([row])
        df_predict_botnet = df_predict.copy()
        
        # masking feature
        selected_features_botnet = botnet_features
        selected_mask_botnet = np.array(selected_features_botnet, dtype=bool)
        selected_columns_botnet = df_predict_botnet.columns[selected_mask_botnet]

        # create feature
        X_selected_botnet = df_predict_botnet.loc[:, selected_columns_botnet]

        # predict 1
        pred1 = clf1.predict(X_selected_botnet)
        if pred1[0] == 0:
            pred = 0
        elif pred1[0] == 1:
            # masking feature
            selected_features_spam = spam_features
            selected_mask_spam = np.array(selected_features_spam, dtype=bool)
            selected_columns_spam = df_predict.columns[selected_mask_spam]
            
            # create feature
            X_selected_spam = df_predict.loc[:, selected_columns_spam]

            # predict 2
            pred2 = clf2.predict(X_selected_spam)
            if pred2[0] == 0:
                pred = 1
            elif pred2[0] == 1:
                pred = 2
        y_pred.append(pred)
        count = count+1
        # print(count)
    return y_pred

# Sensor 1

In [49]:
X_botnet = train1_isbotnet.drop(columns=['isBotnet'])
y_botnet = train1_isbotnet['isBotnet']

In [50]:
X_spam = train1_isspam.drop(columns=['isSpam'])
y_spam = train1_isspam['isSpam']

In [51]:
X_test = sensor1_test.drop(columns=['Label', 'isBotnet', 'isSpam'])
y_test = sensor1_test['Label']

## Without BPSO

### isBotnet

In [52]:
selected_features_botnet = [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]
selected_mask_botnet = np.array(selected_features_botnet, dtype=bool)
selected_columns_botnet = X_botnet.columns[selected_mask_botnet]
X_selected_botnet = X_botnet.loc[:, selected_columns_botnet]

In [53]:
clf1 = DecisionTreeClassifier(random_state=42)
clf1.fit(X_selected_botnet, y_botnet)

### isSpam

In [54]:
selected_features_spam = [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]
selected_mask_spam = np.array(selected_features_spam, dtype=bool)
selected_columns_spam = X_spam.columns[selected_mask_spam]
X_selected_spam = X_spam.loc[:, selected_columns_spam]

In [55]:
clf2 = DecisionTreeClassifier(random_state=42)
clf2.fit(X_selected_spam, y_spam)

### Prediction

In [56]:
# Predict botnet SPAM vs non-SPAM
y_pred = predictionStack(clf1, clf2, X_test, botnet_features=selected_features_botnet, spam_features=selected_features_spam)
np.savetxt('./Output/sensor1_nobpso_result.txt', y_pred, fmt="%s", delimiter="\n")

### Evaluation

In [57]:
# Eval
print(classification_report(y_test, y_pred, digits=5))

              precision    recall  f1-score   support

           0    1.00000   0.99999   1.00000   1093787
           1    0.99980   0.99994   0.99987     34898
           2    1.00000   1.00000   1.00000      1528

    accuracy                        0.99999   1130213
   macro avg    0.99993   0.99998   0.99996   1130213
weighted avg    0.99999   0.99999   0.99999   1130213



In [58]:
# Eval
report = classification_report(y_test, y_pred, target_names=['Normal', 'Botnet Non SPAM', 'Botnet SPAM'], output_dict=True)
df_metrics = pd.DataFrame(report).transpose()
df_metrics = df_metrics[['precision', 'recall', 'f1-score']].drop(['accuracy', 'macro avg', 'weighted avg'])
df_metrics = df_metrics.rename(columns={'precision': 'Pre.', 'recall': 'Rec.', 'f1-score': 'F1'})
df_metrics

Unnamed: 0,Pre.,Rec.,F1
Normal,0.999998,0.999994,0.999996
Botnet Non SPAM,0.999799,0.999943,0.999871
Botnet SPAM,1.0,1.0,1.0


In [59]:
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy:", accuracy)

Accuracy: 0.9999920368992393


In [60]:
matrix = confusion_matrix(y_test, y_pred)
print(matrix)

[[1093780       7       0]
 [      2   34896       0]
 [      0       0    1528]]


## 10 particles

### isBotnet

In [10]:
selected_features_botnet = [1, 0, 1, 0, 0, 1, 0, 0, 0, 1, 1, 1, 1, 0, 0, 1]
selected_mask_botnet = np.array(selected_features_botnet, dtype=bool)
selected_columns_botnet = X_botnet.columns[selected_mask_botnet]
X_selected_botnet = X_botnet.loc[:, selected_columns_botnet]

In [11]:
clf1 = DecisionTreeClassifier(random_state=42)
clf1.fit(X_selected_botnet, y_botnet)

### isSpam

In [12]:
selected_features_spam = [0, 0, 0, 0, 0, 1, 1, 0, 1, 0, 0, 0, 0, 1, 1, 0]
selected_mask_spam = np.array(selected_features_spam, dtype=bool)
selected_columns_spam = X_spam.columns[selected_mask_spam]
X_selected_spam = X_spam.loc[:, selected_columns_spam]

In [13]:
clf2 = DecisionTreeClassifier(random_state=42)
clf2.fit(X_selected_spam, y_spam)

### Prediction

In [None]:
# Predict botnet SPAM vs non-SPAM
y_pred = predictionStack(clf1, clf2, X_test, botnet_features=selected_features_botnet, spam_features=selected_features_spam)
np.savetxt('./Output/sensor1_10_result.txt', y_pred, fmt="%s", delimiter="\n")

### Evaluation

In [None]:
print(classification_report(y_test, y_pred, digits=5))

              precision    recall  f1-score   support

           0    1.00000   1.00000   1.00000   1093787
           1    0.99997   1.00000   0.99999     34898
           2    1.00000   1.00000   1.00000      1528

    accuracy                        1.00000   1130213
   macro avg    0.99999   1.00000   1.00000   1130213
weighted avg    1.00000   1.00000   1.00000   1130213



In [None]:
report = classification_report(y_test, y_pred, target_names=['Normal', 'Botnet Non SPAM', 'Botnet SPAM'], output_dict=True)
df_metrics = pd.DataFrame(report).transpose()
df_metrics = df_metrics[['precision', 'recall', 'f1-score']].drop(['accuracy', 'macro avg', 'weighted avg'])
df_metrics = df_metrics.rename(columns={'precision': 'Pre.', 'recall': 'Rec.', 'f1-score': 'F1'})
df_metrics

Unnamed: 0,Pre.,Rec.,F1
Normal,1.0,0.999999,1.0
Botnet Non SPAM,0.999971,1.0,0.999986
Botnet SPAM,1.0,1.0,1.0


In [21]:
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy:", accuracy)

Accuracy: 0.9999991152110266


In [22]:
matrix = confusion_matrix(y_test, y_pred)
print(matrix)

[[1093786       1       0]
 [      0   34898       0]
 [      0       0    1528]]


## 15 Particles

### isBotnet

In [23]:
selected_features_botnet = [1, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 0, 1, 0, 1, 0]
selected_mask_botnet = np.array(selected_features_botnet, dtype=bool)
selected_columns_botnet = X_botnet.columns[selected_mask_botnet]
X_selected_botnet = X_botnet.loc[:, selected_columns_botnet]

In [24]:
clf1 = DecisionTreeClassifier(random_state=42)
clf1.fit(X_selected_botnet, y_botnet)

### isSpam

In [25]:
selected_features_spam = [1, 0, 0, 1, 0, 0, 1, 0, 1, 1, 0, 1, 0, 0, 1, 1]
selected_mask_spam = np.array(selected_features_spam, dtype=bool)
selected_columns_spam = X_spam.columns[selected_mask_spam]
X_selected_spam = X_spam.loc[:, selected_columns_spam]

In [26]:
clf2 = DecisionTreeClassifier(random_state=42)
clf2.fit(X_selected_spam, y_spam)

### Prediction


In [27]:
# Predict botnet SPAM vs non-SPAM
y_pred = predictionStack(clf1, clf2, X_test, botnet_features=selected_features_botnet, spam_features=selected_features_spam)
np.savetxt('./Output/sensor1_15_result.txt', y_pred, fmt="%s", delimiter="\n")

### Evaluation

In [33]:
# Eval
print(classification_report(y_test, y_pred, digits=5))

              precision    recall  f1-score   support

           0    1.00000   1.00000   1.00000   1093787
           1    0.99997   0.99997   0.99997     34898
           2    1.00000   1.00000   1.00000      1528

    accuracy                        1.00000   1130213
   macro avg    0.99999   0.99999   0.99999   1130213
weighted avg    1.00000   1.00000   1.00000   1130213



In [34]:
# Eval
report = classification_report(y_test, y_pred, target_names=['Normal', 'Botnet Non SPAM', 'Botnet SPAM'], output_dict=True)
df_metrics = pd.DataFrame(report).transpose()
df_metrics = df_metrics[['precision', 'recall', 'f1-score']].drop(['accuracy', 'macro avg', 'weighted avg'])
df_metrics = df_metrics.rename(columns={'precision': 'Pre.', 'recall': 'Rec.', 'f1-score': 'F1'})
df_metrics

Unnamed: 0,Pre.,Rec.,F1
Normal,0.999999,0.999999,0.999999
Botnet Non SPAM,0.999971,0.999971,0.999971
Botnet SPAM,1.0,1.0,1.0


In [35]:
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy:", accuracy)

Accuracy: 0.9999982304220532


In [36]:
matrix = confusion_matrix(y_test, y_pred)
print(matrix)

[[1093786       1       0]
 [      1   34897       0]
 [      0       0    1528]]


## 20 Particles

### isBotnet

In [37]:
selected_features_botnet = [1, 1, 1, 0, 0, 1, 0, 0, 0, 1, 1, 1, 1, 0, 1, 1]
selected_mask_botnet = np.array(selected_features_botnet, dtype=bool)
selected_columns_botnet = X_botnet.columns[selected_mask_botnet]
X_selected_botnet = X_botnet.loc[:, selected_columns_botnet]

In [38]:
clf1 = DecisionTreeClassifier(random_state=42)
clf1.fit(X_selected_botnet, y_botnet)

### isSpam

In [39]:
selected_features_spam = [1, 0, 0, 1, 0, 1, 1, 1, 0, 0, 1, 0, 1, 1, 1, 0]
selected_mask_spam = np.array(selected_features_spam, dtype=bool)
selected_columns_spam = X_spam.columns[selected_mask_spam]
X_selected_spam = X_spam.loc[:, selected_columns_spam]

In [40]:
clf2 = DecisionTreeClassifier(random_state=42)
clf2.fit(X_selected_spam, y_spam)

### Prediction

In [41]:
# Predict botnet SPAM vs non-SPAM
y_pred = predictionStack(clf1, clf2, X_test, botnet_features=selected_features_botnet, spam_features=selected_features_spam)
np.savetxt('./Output/sensor1_20_result.txt', y_pred, fmt="%s", delimiter="\n")

### Evaluation

In [42]:
# Eval
print(classification_report(y_test, y_pred, digits=5))

              precision    recall  f1-score   support

           0    1.00000   1.00000   1.00000   1093787
           1    0.99994   1.00000   0.99997     34898
           2    1.00000   1.00000   1.00000      1528

    accuracy                        1.00000   1130213
   macro avg    0.99998   1.00000   0.99999   1130213
weighted avg    1.00000   1.00000   1.00000   1130213



In [43]:
# Eval
report = classification_report(y_test, y_pred, target_names=['Normal', 'Botnet Non SPAM', 'Botnet SPAM'], output_dict=True)
df_metrics = pd.DataFrame(report).transpose()
df_metrics = df_metrics[['precision', 'recall', 'f1-score']].drop(['accuracy', 'macro avg', 'weighted avg'])
df_metrics = df_metrics.rename(columns={'precision': 'Pre.', 'recall': 'Rec.', 'f1-score': 'F1'})
df_metrics

Unnamed: 0,Pre.,Rec.,F1
Normal,1.0,0.999998,0.999999
Botnet Non SPAM,0.999943,1.0,0.999971
Botnet SPAM,1.0,1.0,1.0


In [44]:
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy:", accuracy)

Accuracy: 0.9999982304220532


In [45]:
matrix = confusion_matrix(y_test, y_pred)
print(matrix)

[[1093785       2       0]
 [      0   34898       0]
 [      0       0    1528]]


# Sensor 2

In [10]:
X_botnet = train2_isbotnet.drop(columns=['isBotnet'])
y_botnet = train2_isbotnet['isBotnet']

In [11]:
X_spam = train2_isspam.drop(columns=['isSpam'])
y_spam = train2_isspam['isSpam']

In [12]:
X_test = sensor2_test.drop(columns=['Label', 'isBotnet', 'isSpam'])
y_test = sensor2_test['Label']

## Without BPSO

### isBotnet

In [40]:
selected_features_botnet = [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]
selected_mask_botnet = np.array(selected_features_botnet, dtype=bool)
selected_columns_botnet = X_botnet.columns[selected_mask_botnet]
X_selected_botnet = X_botnet.loc[:, selected_columns_botnet]

In [41]:
clf1 = DecisionTreeClassifier(random_state=42)
clf1.fit(X_selected_botnet, y_botnet)

### isSpam

In [42]:
selected_features_spam = [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]
selected_mask_spam = np.array(selected_features_spam, dtype=bool)
selected_columns_spam = X_spam.columns[selected_mask_spam]
X_selected_spam = X_spam.loc[:, selected_columns_spam]

In [43]:
clf2 = DecisionTreeClassifier(random_state=42)
clf2.fit(X_selected_spam, y_spam)

### Prediction

In [44]:
# Predict botnet SPAM vs non-SPAM
y_pred = predictionStack(clf1, clf2, X_test, botnet_features=selected_features_botnet, spam_features=selected_features_spam)
np.savetxt('./Output/sensor2_nobpso_result.txt', y_pred, fmt="%s", delimiter="\n")

### Evaluation

In [45]:
# Eval
print(classification_report(y_test, y_pred, digits=5))

              precision    recall  f1-score   support

           0    0.99998   0.99998   0.99998   1278259
           1    0.99968   0.99973   0.99970     84177
           2    1.00000   0.99984   0.99992      6299

    accuracy                        0.99996   1368735
   macro avg    0.99989   0.99985   0.99987   1368735
weighted avg    0.99996   0.99996   0.99996   1368735



In [46]:
# Eval
report = classification_report(y_test, y_pred, target_names=['Normal', 'Botnet Non SPAM', 'Botnet SPAM'], output_dict=True)
df_metrics = pd.DataFrame(report).transpose()
df_metrics = df_metrics[['precision', 'recall', 'f1-score']].drop(['accuracy', 'macro avg', 'weighted avg'])
df_metrics = df_metrics.rename(columns={'precision': 'Pre.', 'recall': 'Rec.', 'f1-score': 'F1'})
df_metrics

Unnamed: 0,Pre.,Rec.,F1
Normal,0.999981,0.999979,0.99998
Botnet Non SPAM,0.999679,0.999727,0.999703
Botnet SPAM,1.0,0.999841,0.999921


In [47]:
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy:", accuracy)

Accuracy: 0.9999627393176912


In [48]:
matrix = confusion_matrix(y_test, y_pred)
print(matrix)

[[1278232      27       0]
 [     23   84154       0]
 [      1       0    6298]]


## 10 particles

### isBotnet

In [13]:
selected_features_botnet = [1, 1, 1, 0, 1, 1, 0, 0, 0, 1, 1, 0, 0, 1, 1, 0]
selected_mask_botnet = np.array(selected_features_botnet, dtype=bool)
selected_columns_botnet = X_botnet.columns[selected_mask_botnet]
X_selected_botnet = X_botnet.loc[:, selected_columns_botnet]

In [14]:
clf1 = DecisionTreeClassifier(random_state=42)
clf1.fit(X_selected_botnet, y_botnet)

### isSpam

In [15]:
selected_features_spam = [0, 1, 0, 1, 1, 0, 1, 0, 1, 0, 0, 1, 0, 1, 0, 0]
selected_mask_spam = np.array(selected_features_spam, dtype=bool)
selected_columns_spam = X_spam.columns[selected_mask_spam]
X_selected_spam = X_spam.loc[:, selected_columns_spam]

In [16]:
clf2 = DecisionTreeClassifier(random_state=42)
clf2.fit(X_selected_spam, y_spam)

### Prediction

In [17]:
# Predict botnet SPAM vs non-SPAM
y_pred = predictionStack(clf1, clf2, X_test, botnet_features=selected_features_botnet, spam_features=selected_features_spam)
np.savetxt('./Output/sensor2_10_result.txt', y_pred, fmt="%s", delimiter="\n")

### Evaluation

In [18]:
# Eval
print(classification_report(y_test, y_pred, digits=5))

              precision    recall  f1-score   support

           0    1.00000   1.00000   1.00000   1278259
           1    1.00000   1.00000   1.00000     84177
           2    1.00000   1.00000   1.00000      6299

    accuracy                        1.00000   1368735
   macro avg    1.00000   1.00000   1.00000   1368735
weighted avg    1.00000   1.00000   1.00000   1368735



In [19]:
# Eval
report = classification_report(y_test, y_pred, target_names=['Normal', 'Botnet Non SPAM', 'Botnet SPAM'], output_dict=True)
df_metrics = pd.DataFrame(report).transpose()
df_metrics = df_metrics[['precision', 'recall', 'f1-score']].drop(['accuracy', 'macro avg', 'weighted avg'])
df_metrics = df_metrics.rename(columns={'precision': 'Pre.', 'recall': 'Rec.', 'f1-score': 'F1'})
df_metrics

Unnamed: 0,Pre.,Rec.,F1
Normal,1.0,1.0,1.0
Botnet Non SPAM,1.0,1.0,1.0
Botnet SPAM,1.0,1.0,1.0


In [20]:
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy:", accuracy)

Accuracy: 1.0


In [21]:
matrix = confusion_matrix(y_test, y_pred)
print(matrix)

[[1278259       0       0]
 [      0   84177       0]
 [      0       0    6299]]


## 15 Particles

### isBotnet

In [22]:
selected_features_botnet = [0, 0, 1, 0, 1, 0, 1, 1, 1, 0, 1, 1, 0, 1, 0, 0]
selected_mask_botnet = np.array(selected_features_botnet, dtype=bool)
selected_columns_botnet = X_botnet.columns[selected_mask_botnet]
X_selected_botnet = X_botnet.loc[:, selected_columns_botnet]

In [23]:
clf1 = DecisionTreeClassifier(random_state=42)
clf1.fit(X_selected_botnet, y_botnet)

### isSpam

In [24]:
selected_features_spam = [1, 1, 1, 1, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 1, 1]
selected_mask_spam = np.array(selected_features_spam, dtype=bool)
selected_columns_spam = X_spam.columns[selected_mask_spam]
X_selected_spam = X_spam.loc[:, selected_columns_spam]

In [25]:
clf2 = DecisionTreeClassifier(random_state=42)
clf2.fit(X_selected_spam, y_spam)

### Prediction


In [26]:
# Predict botnet SPAM vs non-SPAM
y_pred = predictionStack(clf1, clf2, X_test, botnet_features=selected_features_botnet, spam_features=selected_features_spam)
np.savetxt('./Output/sensor2_15_result.txt', y_pred, fmt="%s", delimiter="\n")

### Evaluation

In [27]:
# Eval
print(classification_report(y_test, y_pred, digits=5))

              precision    recall  f1-score   support

           0    1.00000   1.00000   1.00000   1278259
           1    1.00000   1.00000   1.00000     84177
           2    1.00000   1.00000   1.00000      6299

    accuracy                        1.00000   1368735
   macro avg    1.00000   1.00000   1.00000   1368735
weighted avg    1.00000   1.00000   1.00000   1368735



In [28]:
# Eval
report = classification_report(y_test, y_pred, target_names=['Normal', 'Botnet Non SPAM', 'Botnet SPAM'], output_dict=True)
df_metrics = pd.DataFrame(report).transpose()
df_metrics = df_metrics[['precision', 'recall', 'f1-score']].drop(['accuracy', 'macro avg', 'weighted avg'])
df_metrics = df_metrics.rename(columns={'precision': 'Pre.', 'recall': 'Rec.', 'f1-score': 'F1'})
df_metrics

Unnamed: 0,Pre.,Rec.,F1
Normal,1.0,1.0,1.0
Botnet Non SPAM,1.0,1.0,1.0
Botnet SPAM,1.0,1.0,1.0


In [29]:
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy:", accuracy)

Accuracy: 1.0


In [30]:
matrix = confusion_matrix(y_test, y_pred)
print(matrix)

[[1278259       0       0]
 [      0   84177       0]
 [      0       0    6299]]


## 20 Particles

### isBotnet

In [31]:
selected_features_botnet = [1, 1, 1, 0, 1, 0, 1, 1, 1, 0, 0, 0, 1, 0, 0, 0]
selected_mask_botnet = np.array(selected_features_botnet, dtype=bool)
selected_columns_botnet = X_botnet.columns[selected_mask_botnet]
X_selected_botnet = X_botnet.loc[:, selected_columns_botnet]

In [32]:
clf1 = DecisionTreeClassifier(random_state=42)
clf1.fit(X_selected_botnet, y_botnet)

### isSpam

In [33]:
selected_features_spam = [1, 0, 0, 0, 0, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0]
selected_mask_spam = np.array(selected_features_spam, dtype=bool)
selected_columns_spam = X_spam.columns[selected_mask_spam]
X_selected_spam = X_spam.loc[:, selected_columns_spam]

In [34]:
clf2 = DecisionTreeClassifier(random_state=42)
clf2.fit(X_selected_spam, y_spam)

### Prediction

In [35]:
# Predict botnet SPAM vs non-SPAM
y_pred = predictionStack(clf1, clf2, X_test, botnet_features=selected_features_botnet, spam_features=selected_features_spam)
np.savetxt('./Output/sensor2_20_result.txt', y_pred, fmt="%s", delimiter="\n")

### Evaluation

In [36]:
# Eval
print(classification_report(y_test, y_pred, digits=5))

              precision    recall  f1-score   support

           0    1.00000   1.00000   1.00000   1278259
           1    1.00000   0.99993   0.99996     84177
           2    1.00000   1.00000   1.00000      6299

    accuracy                        1.00000   1368735
   macro avg    1.00000   0.99998   0.99999   1368735
weighted avg    1.00000   1.00000   1.00000   1368735



In [37]:
# Eval
report = classification_report(y_test, y_pred, target_names=['Normal', 'Botnet Non SPAM', 'Botnet SPAM'], output_dict=True)
df_metrics = pd.DataFrame(report).transpose()
df_metrics = df_metrics[['precision', 'recall', 'f1-score']].drop(['accuracy', 'macro avg', 'weighted avg'])
df_metrics = df_metrics.rename(columns={'precision': 'Pre.', 'recall': 'Rec.', 'f1-score': 'F1'})
df_metrics

Unnamed: 0,Pre.,Rec.,F1
Normal,0.999995,1.0,0.999998
Botnet Non SPAM,1.0,0.999929,0.999964
Botnet SPAM,1.0,1.0,1.0


In [38]:
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy:", accuracy)

Accuracy: 0.9999956163903166


In [39]:
matrix = confusion_matrix(y_test, y_pred)
print(matrix)

[[1278259       0       0]
 [      6   84171       0]
 [      0       0    6299]]


# Sensor 3

In [61]:
X_botnet = train3_isbotnet.drop(columns=['isBotnet'])
y_botnet = train3_isbotnet['isBotnet']

In [62]:
X_spam = train3_isspam.drop(columns=['isSpam'])
y_spam = train3_isspam['isSpam']

In [63]:
X_test = sensor3_test.drop(columns=['Label', 'isBotnet', 'isSpam'])
y_test = sensor3_test['Label']

## Without BPSO

### isBotnet

In [64]:
selected_features_botnet = [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]
selected_mask_botnet = np.array(selected_features_botnet, dtype=bool)
selected_columns_botnet = X_botnet.columns[selected_mask_botnet]
X_selected_botnet = X_botnet.loc[:, selected_columns_botnet]

In [65]:
clf1 = DecisionTreeClassifier(random_state=42)
clf1.fit(X_selected_botnet, y_botnet)

### isSpam

In [66]:
selected_features_spam = [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]
selected_mask_spam = np.array(selected_features_spam, dtype=bool)
selected_columns_spam = X_spam.columns[selected_mask_spam]
X_selected_spam = X_spam.loc[:, selected_columns_spam]

In [67]:
clf2 = DecisionTreeClassifier(random_state=42)
clf2.fit(X_selected_spam, y_spam)

### Prediction

In [68]:
# Predict botnet SPAM vs non-SPAM
y_pred = predictionStack(clf1, clf2, X_test, botnet_features=selected_features_botnet, spam_features=selected_features_spam)
np.savetxt('./Output/sensor3_nobpso_result.txt', y_pred, fmt="%s", delimiter="\n")

### Evaluation

In [69]:
# Eval
print(classification_report(y_test, y_pred, digits=5))

              precision    recall  f1-score   support

           0    0.99998   0.99996   0.99997    825713
           1    0.99957   0.99972   0.99965     67609
           2    1.00000   1.00000   1.00000      5753

    accuracy                        0.99995    899075
   macro avg    0.99985   0.99989   0.99987    899075
weighted avg    0.99995   0.99995   0.99995    899075



In [70]:
# Eval
report = classification_report(y_test, y_pred, target_names=['Normal', 'Botnet Non SPAM', 'Botnet SPAM'], output_dict=True)
df_metrics = pd.DataFrame(report).transpose()
df_metrics = df_metrics[['precision', 'recall', 'f1-score']].drop(['accuracy', 'macro avg', 'weighted avg'])
df_metrics = df_metrics.rename(columns={'precision': 'Pre.', 'recall': 'Rec.', 'f1-score': 'F1'})
df_metrics

Unnamed: 0,Pre.,Rec.,F1
Normal,0.999977,0.999965,0.999971
Botnet Non SPAM,0.999571,0.999719,0.999645
Botnet SPAM,1.0,1.0,1.0


In [71]:
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy:", accuracy)

Accuracy: 0.9999466117954564


In [72]:
matrix = confusion_matrix(y_test, y_pred)
print(matrix)

[[825684     29      0]
 [    19  67590      0]
 [     0      0   5753]]


## 10 particles

### isBotnet

In [13]:
selected_features_botnet = [0, 1, 1, 0, 1, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0]
selected_mask_botnet = np.array(selected_features_botnet, dtype=bool)
selected_columns_botnet = X_botnet.columns[selected_mask_botnet]
X_selected_botnet = X_botnet.loc[:, selected_columns_botnet]

In [14]:
clf1 = DecisionTreeClassifier(random_state=42)
clf1.fit(X_selected_botnet, y_botnet)

### isSpam

In [15]:
selected_features_spam = [0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 1, 0, 0, 1, 0]
selected_mask_spam = np.array(selected_features_spam, dtype=bool)
selected_columns_spam = X_spam.columns[selected_mask_spam]
X_selected_spam = X_spam.loc[:, selected_columns_spam]

In [16]:
clf2 = DecisionTreeClassifier(random_state=42)
clf2.fit(X_selected_spam, y_spam)

### Prediction

In [17]:
# Predict botnet SPAM vs non-SPAM
y_pred = predictionStack(clf1, clf2, X_test, botnet_features=selected_features_botnet, spam_features=selected_features_spam)
np.savetxt('./Output/sensor3_10_result.txt', y_pred, fmt="%s", delimiter="\n")

### Evaluation

In [18]:
# Eval
print(classification_report(y_test, y_pred, digits=5))

              precision    recall  f1-score   support

           0    1.00000   1.00000   1.00000    825713
           1    1.00000   1.00000   1.00000     67609
           2    1.00000   1.00000   1.00000      5753

    accuracy                        1.00000    899075
   macro avg    1.00000   1.00000   1.00000    899075
weighted avg    1.00000   1.00000   1.00000    899075



In [19]:
# Eval
report = classification_report(y_test, y_pred, target_names=['Normal', 'Botnet Non SPAM', 'Botnet SPAM'], output_dict=True)
df_metrics = pd.DataFrame(report).transpose()
df_metrics = df_metrics[['precision', 'recall', 'f1-score']].drop(['accuracy', 'macro avg', 'weighted avg'])
df_metrics = df_metrics.rename(columns={'precision': 'Pre.', 'recall': 'Rec.', 'f1-score': 'F1'})
df_metrics

Unnamed: 0,Pre.,Rec.,F1
Normal,1.0,1.0,1.0
Botnet Non SPAM,1.0,1.0,1.0
Botnet SPAM,1.0,1.0,1.0


In [20]:
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy:", accuracy)

Accuracy: 1.0


In [21]:
matrix = confusion_matrix(y_test, y_pred)
print(matrix)

[[825713      0      0]
 [     0  67609      0]
 [     0      0   5753]]


## 15 Particles

### isBotnet

In [23]:
selected_features_botnet = [1, 0, 1, 0, 1, 1, 0, 1, 0, 1, 0, 0, 0, 0, 1, 1]
selected_mask_botnet = np.array(selected_features_botnet, dtype=bool)
selected_columns_botnet = X_botnet.columns[selected_mask_botnet]
X_selected_botnet = X_botnet.loc[:, selected_columns_botnet]

In [24]:
clf1 = DecisionTreeClassifier(random_state=42)
clf1.fit(X_selected_botnet, y_botnet)

### isSpam

In [25]:
selected_features_spam = [1, 1, 1, 0, 1, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0]
selected_mask_spam = np.array(selected_features_spam, dtype=bool)
selected_columns_spam = X_spam.columns[selected_mask_spam]
X_selected_spam = X_spam.loc[:, selected_columns_spam]

In [26]:
clf2 = DecisionTreeClassifier(random_state=42)
clf2.fit(X_selected_spam, y_spam)

### Prediction


In [27]:
# Predict botnet SPAM vs non-SPAM
y_pred = predictionStack(clf1, clf2, X_test, botnet_features=selected_features_botnet, spam_features=selected_features_spam)
np.savetxt('./Output/sensor3_15_result.txt', y_pred, fmt="%s", delimiter="\n")

### Evaluation

In [28]:
# Eval
print(classification_report(y_test, y_pred, digits=5))

              precision    recall  f1-score   support

           0    1.00000   0.99999   1.00000    825713
           1    0.99993   0.99997   0.99995     67609
           2    1.00000   1.00000   1.00000      5753

    accuracy                        0.99999    899075
   macro avg    0.99997   0.99999   0.99998    899075
weighted avg    0.99999   0.99999   0.99999    899075



In [29]:
# Eval
report = classification_report(y_test, y_pred, target_names=['Normal', 'Botnet Non SPAM', 'Botnet SPAM'], output_dict=True)
df_metrics = pd.DataFrame(report).transpose()
df_metrics = df_metrics[['precision', 'recall', 'f1-score']].drop(['accuracy', 'macro avg', 'weighted avg'])
df_metrics = df_metrics.rename(columns={'precision': 'Pre.', 'recall': 'Rec.', 'f1-score': 'F1'})
df_metrics

Unnamed: 0,Pre.,Rec.,F1
Normal,0.999998,0.999994,0.999996
Botnet Non SPAM,0.999926,0.99997,0.999948
Botnet SPAM,1.0,1.0,1.0


In [30]:
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy:", accuracy)

Accuracy: 0.9999922142201707


In [31]:
matrix = confusion_matrix(y_test, y_pred)
print(matrix)

[[825708      5      0]
 [     2  67607      0]
 [     0      0   5753]]


## 20 Particles

### isBotnet

In [37]:
selected_features_botnet = [0, 1, 1, 0, 1, 1, 1, 0, 0, 1, 1, 0, 1, 0, 1, 0]
selected_mask_botnet = np.array(selected_features_botnet, dtype=bool)
selected_columns_botnet = X_botnet.columns[selected_mask_botnet]
X_selected_botnet = X_botnet.loc[:, selected_columns_botnet]

In [38]:
clf1 = DecisionTreeClassifier(random_state=42)
clf1.fit(X_selected_botnet, y_botnet)

### isSpam

In [39]:
selected_features_spam = [0, 1, 0, 0, 0, 0, 1, 1, 1, 0, 1, 0, 0, 1, 1, 1]
selected_mask_spam = np.array(selected_features_spam, dtype=bool)
selected_columns_spam = X_spam.columns[selected_mask_spam]
X_selected_spam = X_spam.loc[:, selected_columns_spam]

In [40]:
clf2 = DecisionTreeClassifier(random_state=42)
clf2.fit(X_selected_spam, y_spam)

### Prediction

In [41]:
# Predict botnet SPAM vs non-SPAM
y_pred = predictionStack(clf1, clf2, X_test, botnet_features=selected_features_botnet, spam_features=selected_features_spam)
np.savetxt('./Output/sensor3_20_result.txt', y_pred, fmt="%s", delimiter="\n")

### Evaluation

In [42]:
# Eval
print(classification_report(y_test, y_pred, digits=5))

              precision    recall  f1-score   support

           0    1.00000   1.00000   1.00000    825713
           1    1.00000   1.00000   1.00000     67609
           2    1.00000   1.00000   1.00000      5753

    accuracy                        1.00000    899075
   macro avg    1.00000   1.00000   1.00000    899075
weighted avg    1.00000   1.00000   1.00000    899075



In [43]:
# Eval
report = classification_report(y_test, y_pred, target_names=['Normal', 'Botnet Non SPAM', 'Botnet SPAM'], output_dict=True)
df_metrics = pd.DataFrame(report).transpose()
df_metrics = df_metrics[['precision', 'recall', 'f1-score']].drop(['accuracy', 'macro avg', 'weighted avg'])
df_metrics = df_metrics.rename(columns={'precision': 'Pre.', 'recall': 'Rec.', 'f1-score': 'F1'})
df_metrics

Unnamed: 0,Pre.,Rec.,F1
Normal,1.0,1.0,1.0
Botnet Non SPAM,1.0,1.0,1.0
Botnet SPAM,1.0,1.0,1.0


In [44]:
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy:", accuracy)

Accuracy: 1.0


In [45]:
matrix = confusion_matrix(y_test, y_pred)
print(matrix)

[[825713      0      0]
 [     0  67609      0]
 [     0      0   5753]]
