In [1]:
# import library
import pandas as pd 
import numpy as np 
import matplotlib.pyplot as plt 
import seaborn as sns 
import plotly.express as px 
from datetime import datetime
import os
import tensorflow as tf
%matplotlib inline

from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

# Import Dataset

In [2]:
dataset_dir = '../../Dataset/'

In [3]:
# Sensor 1
train1_isbotnet = pd.read_csv(dataset_dir + 'sensor1_isbotnet.csv')
train1_isspam = pd.read_csv(dataset_dir + 'sensor1_isspam.csv')
sensor1_test = pd.read_csv(dataset_dir + 'test1_encoded.csv')

In [4]:
sensor1_test.head()

Unnamed: 0,Dur,Proto,SrcAddr,Sport,Dir,DstAddr,Dport,State,sTos,dTos,TotPkts,TotBytes,SrcBytes,Label,StartTimeHour,StartTimeMinute,StartTimeSecond,isBotnet,isSpam
0,0.000921,4,0,102,3,994426,955188,15,0,0,2,131,71,0,13,54,5,0,0
1,0.000224,4,74229,49,3,1367792,1389751,15,0,0,2,327,85,0,12,35,37,0,0
2,0.00095,4,8,13,3,994426,955188,15,0,0,2,137,77,0,14,31,9,0,0
3,0.00026,4,5610,45,3,1367792,1389751,15,0,0,2,345,69,0,14,44,12,0,0
4,0.055122,3,395832,107,0,299,489667,112,0,0,10,1435,795,0,9,37,22,0,0


In [3]:
# Sensor 2
train2_isbotnet = pd.read_csv(dataset_dir + 'sensor2_isbotnet.csv')
train2_isspam = pd.read_csv(dataset_dir + 'sensor2_isspam.csv')
sensor2_test = pd.read_csv(dataset_dir + 'test2_encoded.csv')

In [4]:
sensor2_test.head()

Unnamed: 0,Dur,Proto,SrcAddr,Sport,Dir,DstAddr,Dport,State,sTos,dTos,TotPkts,TotBytes,SrcBytes,Label,StartTimeHour,StartTimeMinute,StartTimeSecond,isBotnet,isSpam
0,0.000317,4,811174,72,3,1621520,1645689,12,0,0,2,216,83,0,9,15,18,0,0
1,0.000452,4,462414,84,3,1621520,1645689,12,0,0,2,573,80,0,12,47,24,0,0
2,0.000286,4,811174,194,3,1621520,1645689,12,0,0,2,214,81,0,11,31,33,0,0
3,0.001396,1,0,29,3,1212234,1166534,12,0,0,2,425,83,0,9,21,4,0,0
4,0.000475,4,811174,105,3,1621520,1645689,12,0,0,2,214,81,0,14,31,2,0,0


In [3]:
# Sensor 3
train3_isbotnet = pd.read_csv(dataset_dir + 'sensor3_isbotnet.csv')
train3_isspam = pd.read_csv(dataset_dir + 'sensor3_isspam.csv')
sensor3_test = pd.read_csv(dataset_dir + 'test3_encoded.csv')

In [4]:
sensor3_test.head()

Unnamed: 0,Dur,Proto,SrcAddr,Sport,Dir,DstAddr,Dport,State,sTos,dTos,TotPkts,TotBytes,SrcBytes,Label,StartTimeHour,StartTimeMinute,StartTimeSecond,isBotnet,isSpam
0,0.015153,4,32986,36,3,1211113,1220778,8,0,0,2,335,73,0,10,37,22,0,0
1,0.000743,4,1,12,3,197730,185254,8,0,0,2,138,77,0,9,41,9,0,0
2,0.166308,4,731,246,3,791,732,8,0,0,2,485,145,1,9,54,4,1,0
3,104.185165,4,1,85,3,338987,318377,8,0,0,4,499,364,0,13,16,26,0,0
4,0.327292,3,440828,32,0,11057,469208,57,0,0,9,1535,699,0,10,9,27,0,0


# Modeling

In [5]:
def predictionStack(clf1, clf2, X_test, botnet_features, spam_features):
    y_pred = []
    count = 0
    for index, row in X_test.iterrows():
        # create dataframe
        df_predict = pd.DataFrame([row])
        df_predict_botnet = df_predict.copy()
        
        # masking feature
        selected_features_botnet = botnet_features
        selected_mask_botnet = np.array(selected_features_botnet, dtype=bool)
        selected_columns_botnet = df_predict_botnet.columns[selected_mask_botnet]

        # create feature
        X_selected_botnet = df_predict_botnet.loc[:, selected_columns_botnet]

        # predict 1
        pred1 = clf1.predict(X_selected_botnet)
        if pred1[0] == 0:
            pred = 0
        elif pred1[0] == 1:
            # masking feature
            selected_features_spam = spam_features
            selected_mask_spam = np.array(selected_features_spam, dtype=bool)
            selected_columns_spam = df_predict.columns[selected_mask_spam]
            
            # create feature
            X_selected_spam = df_predict.loc[:, selected_columns_spam]

            # predict 2
            pred2 = clf2.predict(X_selected_spam)
            if pred2[0] == 0:
                pred = 1
            elif pred2[0] == 1:
                pred = 2
        y_pred.append(pred)
        count = count+1
        # print(count)
    return y_pred

# Sensor 1

In [6]:
X_botnet = train1_isbotnet.drop(columns=['isBotnet'])
y_botnet = train1_isbotnet['isBotnet']

In [7]:
X_spam = train1_isspam.drop(columns=['isSpam'])
y_spam = train1_isspam['isSpam']

In [8]:
X_test = sensor1_test.drop(columns=['Label', 'isBotnet', 'isSpam'])
y_test = sensor1_test['Label']

## Without BPSO

### isBotnet

In [9]:
selected_features_botnet = [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]
selected_mask_botnet = np.array(selected_features_botnet, dtype=bool)
selected_columns_botnet = X_botnet.columns[selected_mask_botnet]
X_selected_botnet = X_botnet.loc[:, selected_columns_botnet]

In [10]:
clf1 = DecisionTreeClassifier(random_state=42)
clf1.fit(X_selected_botnet, y_botnet)

### isSpam

In [11]:
selected_features_spam = [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]
selected_mask_spam = np.array(selected_features_spam, dtype=bool)
selected_columns_spam = X_spam.columns[selected_mask_spam]
X_selected_spam = X_spam.loc[:, selected_columns_spam]

In [12]:
clf2 = DecisionTreeClassifier(random_state=42)
clf2.fit(X_selected_spam, y_spam)

### Prediction

In [14]:
# Predict botnet SPAM vs non-SPAM
y_pred = predictionStack(clf1, clf2, X_test, botnet_features=selected_features_botnet, spam_features=selected_features_spam)
np.savetxt('../Output/Revisi 3/sensor1_nobpso_result.txt', y_pred, fmt="%s", delimiter="\n")

### Evaluation

In [15]:
# Eval
print(classification_report(y_test, y_pred, digits=5))

              precision    recall  f1-score   support

           0    0.99992   0.99999   0.99996   1093081
           1    0.99977   0.99767   0.99872     35241
           2    1.00000   1.00000   1.00000      1479

    accuracy                        0.99992   1129801
   macro avg    0.99990   0.99922   0.99956   1129801
weighted avg    0.99992   0.99992   0.99992   1129801



In [16]:
# Eval
report = classification_report(y_test, y_pred, target_names=['Normal', 'Botnet Non SPAM', 'Botnet SPAM'], output_dict=True)
df_metrics = pd.DataFrame(report).transpose()
df_metrics = df_metrics[['precision', 'recall', 'f1-score']].drop(['accuracy', 'macro avg', 'weighted avg'])
df_metrics = df_metrics.rename(columns={'precision': 'Pre.', 'recall': 'Rec.', 'f1-score': 'F1'})
df_metrics

Unnamed: 0,Pre.,Rec.,F1
Normal,0.999925,0.999993,0.999959
Botnet Non SPAM,0.999773,0.997673,0.998722
Botnet SPAM,1.0,1.0,1.0


In [17]:
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy:", accuracy)

Accuracy: 0.9999203399536732


In [18]:
matrix = confusion_matrix(y_test, y_pred)
print(matrix)

[[1093073       8       0]
 [     82   35159       0]
 [      0       0    1479]]


## 10 particles

### isBotnet

In [19]:
selected_features_botnet = [0, 1, 1, 1, 0, 0, 0, 0, 1, 0, 0, 1, 1, 0, 0, 0]
selected_mask_botnet = np.array(selected_features_botnet, dtype=bool)
selected_columns_botnet = X_botnet.columns[selected_mask_botnet]
X_selected_botnet = X_botnet.loc[:, selected_columns_botnet]

In [20]:
clf1 = DecisionTreeClassifier(random_state=42)
clf1.fit(X_selected_botnet, y_botnet)

### isSpam

In [21]:
selected_features_spam = [1, 1, 0, 0, 0, 1, 1, 0, 1, 0, 0, 0, 0, 1, 0, 1]
selected_mask_spam = np.array(selected_features_spam, dtype=bool)
selected_columns_spam = X_spam.columns[selected_mask_spam]
X_selected_spam = X_spam.loc[:, selected_columns_spam]

In [22]:
clf2 = DecisionTreeClassifier(random_state=42)
clf2.fit(X_selected_spam, y_spam)

### Prediction

In [23]:
# Predict botnet SPAM vs non-SPAM
y_pred = predictionStack(clf1, clf2, X_test, botnet_features=selected_features_botnet, spam_features=selected_features_spam)
np.savetxt('../Output/Revisi 3/sensor1_10_result.txt', y_pred, fmt="%s", delimiter="\n")

### Evaluation

In [24]:
print(classification_report(y_test, y_pred, digits=5))

              precision    recall  f1-score   support

           0    1.00000   1.00000   1.00000   1093081
           1    0.99994   1.00000   0.99997     35241
           2    1.00000   1.00000   1.00000      1479

    accuracy                        1.00000   1129801
   macro avg    0.99998   1.00000   0.99999   1129801
weighted avg    1.00000   1.00000   1.00000   1129801



In [25]:
report = classification_report(y_test, y_pred, target_names=['Normal', 'Botnet Non SPAM', 'Botnet SPAM'], output_dict=True)
df_metrics = pd.DataFrame(report).transpose()
df_metrics = df_metrics[['precision', 'recall', 'f1-score']].drop(['accuracy', 'macro avg', 'weighted avg'])
df_metrics = df_metrics.rename(columns={'precision': 'Pre.', 'recall': 'Rec.', 'f1-score': 'F1'})
df_metrics

Unnamed: 0,Pre.,Rec.,F1
Normal,1.0,0.999998,0.999999
Botnet Non SPAM,0.999943,1.0,0.999972
Botnet SPAM,1.0,1.0,1.0


In [26]:
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy:", accuracy)

Accuracy: 0.9999982297767483


In [27]:
matrix = confusion_matrix(y_test, y_pred)
print(matrix)

[[1093079       2       0]
 [      0   35241       0]
 [      0       0    1479]]


## 15 Particles

### isBotnet

In [28]:
selected_features_botnet = [0, 0, 1, 0, 1, 1, 1, 1, 1, 0, 0, 1, 0, 0, 0, 0]
selected_mask_botnet = np.array(selected_features_botnet, dtype=bool)
selected_columns_botnet = X_botnet.columns[selected_mask_botnet]
X_selected_botnet = X_botnet.loc[:, selected_columns_botnet]

In [29]:
clf1 = DecisionTreeClassifier(random_state=42)
clf1.fit(X_selected_botnet, y_botnet)

### isSpam

In [30]:
selected_features_spam = [1, 1, 1, 0, 1, 0, 1, 0, 1, 1, 1, 0, 0, 0, 0, 1]
selected_mask_spam = np.array(selected_features_spam, dtype=bool)
selected_columns_spam = X_spam.columns[selected_mask_spam]
X_selected_spam = X_spam.loc[:, selected_columns_spam]

In [31]:
clf2 = DecisionTreeClassifier(random_state=42)
clf2.fit(X_selected_spam, y_spam)

### Prediction


In [32]:
# Predict botnet SPAM vs non-SPAM
y_pred = predictionStack(clf1, clf2, X_test, botnet_features=selected_features_botnet, spam_features=selected_features_spam)
np.savetxt('../Output/Revisi 3/sensor1_15_result.txt', y_pred, fmt="%s", delimiter="\n")

### Evaluation

In [33]:
# Eval
print(classification_report(y_test, y_pred, digits=5))

              precision    recall  f1-score   support

           0    0.99947   1.00000   0.99973   1093081
           1    0.99988   0.98351   0.99163     35241
           2    1.00000   1.00000   1.00000      1479

    accuracy                        0.99948   1129801
   macro avg    0.99978   0.99450   0.99712   1129801
weighted avg    0.99948   0.99948   0.99948   1129801



In [34]:
# Eval
report = classification_report(y_test, y_pred, target_names=['Normal', 'Botnet Non SPAM', 'Botnet SPAM'], output_dict=True)
df_metrics = pd.DataFrame(report).transpose()
df_metrics = df_metrics[['precision', 'recall', 'f1-score']].drop(['accuracy', 'macro avg', 'weighted avg'])
df_metrics = df_metrics.rename(columns={'precision': 'Pre.', 'recall': 'Rec.', 'f1-score': 'F1'})
df_metrics

Unnamed: 0,Pre.,Rec.,F1
Normal,0.999469,0.999996,0.999732
Botnet Non SPAM,0.999885,0.983514,0.991631
Botnet SPAM,1.0,1.0,1.0


In [35]:
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy:", accuracy)

Accuracy: 0.9994822096988761


In [36]:
matrix = confusion_matrix(y_test, y_pred)
print(matrix)

[[1093077       4       0]
 [    581   34660       0]
 [      0       0    1479]]


## 20 Particles

### isBotnet

In [37]:
selected_features_botnet = [0, 1, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0]
selected_mask_botnet = np.array(selected_features_botnet, dtype=bool)
selected_columns_botnet = X_botnet.columns[selected_mask_botnet]
X_selected_botnet = X_botnet.loc[:, selected_columns_botnet]

In [38]:
clf1 = DecisionTreeClassifier(random_state=42)
clf1.fit(X_selected_botnet, y_botnet)

### isSpam

In [39]:
selected_features_spam = [0, 1, 0, 0, 1, 1, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1]
selected_mask_spam = np.array(selected_features_spam, dtype=bool)
selected_columns_spam = X_spam.columns[selected_mask_spam]
X_selected_spam = X_spam.loc[:, selected_columns_spam]

In [40]:
clf2 = DecisionTreeClassifier(random_state=42)
clf2.fit(X_selected_spam, y_spam)

### Prediction

In [41]:
# Predict botnet SPAM vs non-SPAM
y_pred = predictionStack(clf1, clf2, X_test, botnet_features=selected_features_botnet, spam_features=selected_features_spam)
np.savetxt('../Output/Revisi 3/sensor1_20_result.txt', y_pred, fmt="%s", delimiter="\n")

### Evaluation

In [42]:
# Eval
print(classification_report(y_test, y_pred, digits=5))

              precision    recall  f1-score   support

           0    1.00000   1.00000   1.00000   1093081
           1    0.99991   1.00000   0.99996     35241
           2    1.00000   1.00000   1.00000      1479

    accuracy                        1.00000   1129801
   macro avg    0.99997   1.00000   0.99999   1129801
weighted avg    1.00000   1.00000   1.00000   1129801



In [43]:
# Eval
report = classification_report(y_test, y_pred, target_names=['Normal', 'Botnet Non SPAM', 'Botnet SPAM'], output_dict=True)
df_metrics = pd.DataFrame(report).transpose()
df_metrics = df_metrics[['precision', 'recall', 'f1-score']].drop(['accuracy', 'macro avg', 'weighted avg'])
df_metrics = df_metrics.rename(columns={'precision': 'Pre.', 'recall': 'Rec.', 'f1-score': 'F1'})
df_metrics

Unnamed: 0,Pre.,Rec.,F1
Normal,1.0,0.999997,0.999999
Botnet Non SPAM,0.999915,1.0,0.999957
Botnet SPAM,1.0,1.0,1.0


In [44]:
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy:", accuracy)

Accuracy: 0.9999973446651225


In [45]:
matrix = confusion_matrix(y_test, y_pred)
print(matrix)

[[1093078       3       0]
 [      0   35241       0]
 [      0       0    1479]]


# Sensor 2

In [6]:
X_botnet = train2_isbotnet.drop(columns=['isBotnet'])
y_botnet = train2_isbotnet['isBotnet']

In [7]:
X_spam = train2_isspam.drop(columns=['isSpam'])
y_spam = train2_isspam['isSpam']

In [8]:
X_test = sensor2_test.drop(columns=['Label', 'isBotnet', 'isSpam'])
y_test = sensor2_test['Label']

## Without BPSO

### isBotnet

In [9]:
selected_features_botnet = [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]
selected_mask_botnet = np.array(selected_features_botnet, dtype=bool)
selected_columns_botnet = X_botnet.columns[selected_mask_botnet]
X_selected_botnet = X_botnet.loc[:, selected_columns_botnet]

In [10]:
clf1 = DecisionTreeClassifier(random_state=42)
clf1.fit(X_selected_botnet, y_botnet)

### isSpam

In [11]:
selected_features_spam = [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]
selected_mask_spam = np.array(selected_features_spam, dtype=bool)
selected_columns_spam = X_spam.columns[selected_mask_spam]
X_selected_spam = X_spam.loc[:, selected_columns_spam]

In [12]:
clf2 = DecisionTreeClassifier(random_state=42)
clf2.fit(X_selected_spam, y_spam)

### Prediction

In [14]:
# Predict botnet SPAM vs non-SPAM
y_pred = predictionStack(clf1, clf2, X_test, botnet_features=selected_features_botnet, spam_features=selected_features_spam)
np.savetxt('../Output/Revisi 3/sensor2_nobpso_result.txt', y_pred, fmt="%s", delimiter="\n")

### Evaluation

In [15]:
# Eval
print(classification_report(y_test, y_pred, digits=5))

              precision    recall  f1-score   support

           0    0.99850   0.99996   0.99923   1278136
           1    0.99934   0.97824   0.98867     84550
           2    1.00000   0.98621   0.99306      6235

    accuracy                        0.99855   1368921
   macro avg    0.99928   0.98813   0.99365   1368921
weighted avg    0.99855   0.99855   0.99855   1368921



In [16]:
# Eval
report = classification_report(y_test, y_pred, target_names=['Normal', 'Botnet Non SPAM', 'Botnet SPAM'], output_dict=True)
df_metrics = pd.DataFrame(report).transpose()
df_metrics = df_metrics[['precision', 'recall', 'f1-score']].drop(['accuracy', 'macro avg', 'weighted avg'])
df_metrics = df_metrics.rename(columns={'precision': 'Pre.', 'recall': 'Rec.', 'f1-score': 'F1'})
df_metrics

Unnamed: 0,Pre.,Rec.,F1
Normal,0.998495,0.999957,0.999226
Botnet Non SPAM,0.999335,0.978238,0.988674
Botnet SPAM,1.0,0.986207,0.993056


In [17]:
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy:", accuracy)

Accuracy: 0.9985528748554519


In [18]:
matrix = confusion_matrix(y_test, y_pred)
print(matrix)

[[1278081      55       0]
 [   1840   82710       0]
 [     86       0    6149]]


## 10 particles

### isBotnet

In [19]:
selected_features_botnet = [0, 1, 1, 0, 1, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1]
selected_mask_botnet = np.array(selected_features_botnet, dtype=bool)
selected_columns_botnet = X_botnet.columns[selected_mask_botnet]
X_selected_botnet = X_botnet.loc[:, selected_columns_botnet]

In [20]:
clf1 = DecisionTreeClassifier(random_state=42)
clf1.fit(X_selected_botnet, y_botnet)

### isSpam

In [21]:
selected_features_spam = [1, 1, 0, 0, 0, 1, 1, 1, 0, 0, 0, 1, 0, 1, 1, 1]
selected_mask_spam = np.array(selected_features_spam, dtype=bool)
selected_columns_spam = X_spam.columns[selected_mask_spam]
X_selected_spam = X_spam.loc[:, selected_columns_spam]

In [22]:
clf2 = DecisionTreeClassifier(random_state=42)
clf2.fit(X_selected_spam, y_spam)

### Prediction

In [23]:
# Predict botnet SPAM vs non-SPAM
y_pred = predictionStack(clf1, clf2, X_test, botnet_features=selected_features_botnet, spam_features=selected_features_spam)
np.savetxt('../Output/Revisi 3/sensor2_10_result.txt', y_pred, fmt="%s", delimiter="\n")

### Evaluation

In [24]:
# Eval
print(classification_report(y_test, y_pred, digits=5))

              precision    recall  f1-score   support

           0    1.00000   1.00000   1.00000   1278136
           1    1.00000   1.00000   1.00000     84550
           2    1.00000   1.00000   1.00000      6235

    accuracy                        1.00000   1368921
   macro avg    1.00000   1.00000   1.00000   1368921
weighted avg    1.00000   1.00000   1.00000   1368921



In [25]:
# Eval
report = classification_report(y_test, y_pred, target_names=['Normal', 'Botnet Non SPAM', 'Botnet SPAM'], output_dict=True)
df_metrics = pd.DataFrame(report).transpose()
df_metrics = df_metrics[['precision', 'recall', 'f1-score']].drop(['accuracy', 'macro avg', 'weighted avg'])
df_metrics = df_metrics.rename(columns={'precision': 'Pre.', 'recall': 'Rec.', 'f1-score': 'F1'})
df_metrics

Unnamed: 0,Pre.,Rec.,F1
Normal,1.0,1.0,1.0
Botnet Non SPAM,1.0,1.0,1.0
Botnet SPAM,1.0,1.0,1.0


In [26]:
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy:", accuracy)

Accuracy: 1.0


In [27]:
matrix = confusion_matrix(y_test, y_pred)
print(matrix)

[[1278136       0       0]
 [      0   84550       0]
 [      0       0    6235]]


## 15 Particles

### isBotnet

In [28]:
selected_features_botnet = [0, 0, 1, 0, 0, 0, 1, 0, 1, 1, 1, 0, 0, 1, 1, 1]
selected_mask_botnet = np.array(selected_features_botnet, dtype=bool)
selected_columns_botnet = X_botnet.columns[selected_mask_botnet]
X_selected_botnet = X_botnet.loc[:, selected_columns_botnet]

In [29]:
clf1 = DecisionTreeClassifier(random_state=42)
clf1.fit(X_selected_botnet, y_botnet)

### isSpam

In [30]:
selected_features_spam = [0, 1, 1, 1, 0, 0, 1, 0, 1, 0, 0, 0, 1, 0, 0, 0]
selected_mask_spam = np.array(selected_features_spam, dtype=bool)
selected_columns_spam = X_spam.columns[selected_mask_spam]
X_selected_spam = X_spam.loc[:, selected_columns_spam]

In [31]:
clf2 = DecisionTreeClassifier(random_state=42)
clf2.fit(X_selected_spam, y_spam)

### Prediction


In [32]:
# Predict botnet SPAM vs non-SPAM
y_pred = predictionStack(clf1, clf2, X_test, botnet_features=selected_features_botnet, spam_features=selected_features_spam)
np.savetxt('../Output/Revisi 3/sensor2_15_result.txt', y_pred, fmt="%s", delimiter="\n")

### Evaluation

In [33]:
# Eval
print(classification_report(y_test, y_pred, digits=5))

              precision    recall  f1-score   support

           0    1.00000   1.00000   1.00000   1278136
           1    1.00000   1.00000   1.00000     84550
           2    1.00000   1.00000   1.00000      6235

    accuracy                        1.00000   1368921
   macro avg    1.00000   1.00000   1.00000   1368921
weighted avg    1.00000   1.00000   1.00000   1368921



In [34]:
# Eval
report = classification_report(y_test, y_pred, target_names=['Normal', 'Botnet Non SPAM', 'Botnet SPAM'], output_dict=True)
df_metrics = pd.DataFrame(report).transpose()
df_metrics = df_metrics[['precision', 'recall', 'f1-score']].drop(['accuracy', 'macro avg', 'weighted avg'])
df_metrics = df_metrics.rename(columns={'precision': 'Pre.', 'recall': 'Rec.', 'f1-score': 'F1'})
df_metrics

Unnamed: 0,Pre.,Rec.,F1
Normal,1.0,1.0,1.0
Botnet Non SPAM,1.0,1.0,1.0
Botnet SPAM,1.0,1.0,1.0


In [35]:
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy:", accuracy)

Accuracy: 1.0


In [36]:
matrix = confusion_matrix(y_test, y_pred)
print(matrix)

[[1278136       0       0]
 [      0   84550       0]
 [      0       0    6235]]


## 20 Particles

### isBotnet

In [37]:
selected_features_botnet = [1, 0, 1, 0, 0, 0, 0, 1, 1, 0, 1, 0, 1, 1, 0, 1]
selected_mask_botnet = np.array(selected_features_botnet, dtype=bool)
selected_columns_botnet = X_botnet.columns[selected_mask_botnet]
X_selected_botnet = X_botnet.loc[:, selected_columns_botnet]

In [38]:
clf1 = DecisionTreeClassifier(random_state=42)
clf1.fit(X_selected_botnet, y_botnet)

### isSpam

In [39]:
selected_features_spam = [0, 1, 0, 0, 0, 0, 1, 0, 1, 0, 0, 1, 0, 0, 0, 1]
selected_mask_spam = np.array(selected_features_spam, dtype=bool)
selected_columns_spam = X_spam.columns[selected_mask_spam]
X_selected_spam = X_spam.loc[:, selected_columns_spam]

In [40]:
clf2 = DecisionTreeClassifier(random_state=42)
clf2.fit(X_selected_spam, y_spam)

### Prediction

In [41]:
# Predict botnet SPAM vs non-SPAM
y_pred = predictionStack(clf1, clf2, X_test, botnet_features=selected_features_botnet, spam_features=selected_features_spam)
np.savetxt('../Output/Revisi 3/sensor2_20_result.txt', y_pred, fmt="%s", delimiter="\n")

### Evaluation

In [42]:
# Eval
print(classification_report(y_test, y_pred, digits=5))

              precision    recall  f1-score   support

           0    1.00000   1.00000   1.00000   1278136
           1    0.99999   1.00000   0.99999     84550
           2    1.00000   1.00000   1.00000      6235

    accuracy                        1.00000   1368921
   macro avg    1.00000   1.00000   1.00000   1368921
weighted avg    1.00000   1.00000   1.00000   1368921



In [43]:
# Eval
report = classification_report(y_test, y_pred, target_names=['Normal', 'Botnet Non SPAM', 'Botnet SPAM'], output_dict=True)
df_metrics = pd.DataFrame(report).transpose()
df_metrics = df_metrics[['precision', 'recall', 'f1-score']].drop(['accuracy', 'macro avg', 'weighted avg'])
df_metrics = df_metrics.rename(columns={'precision': 'Pre.', 'recall': 'Rec.', 'f1-score': 'F1'})
df_metrics

Unnamed: 0,Pre.,Rec.,F1
Normal,1.0,0.999999,1.0
Botnet Non SPAM,0.999988,1.0,0.999994
Botnet SPAM,1.0,1.0,1.0


In [44]:
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy:", accuracy)

Accuracy: 0.9999992694976555


In [45]:
matrix = confusion_matrix(y_test, y_pred)
print(matrix)

[[1278135       1       0]
 [      0   84550       0]
 [      0       0    6235]]


# Sensor 3

In [6]:
X_botnet = train3_isbotnet.drop(columns=['isBotnet'])
y_botnet = train3_isbotnet['isBotnet']

In [7]:
X_spam = train3_isspam.drop(columns=['isSpam'])
y_spam = train3_isspam['isSpam']

In [8]:
X_test = sensor3_test.drop(columns=['Label', 'isBotnet', 'isSpam'])
y_test = sensor3_test['Label']

## Testing tanpa FreqEncoding

### isBotnet

In [9]:
X_botnet.head()

Unnamed: 0,Dur,Proto,SrcAddr,Sport,Dir,DstAddr,Dport,State,sTos,dTos,TotPkts,TotBytes,SrcBytes,StartTimeHour,StartTimeMinute,StartTimeSecond
0,0.002248,4,1,106,3,338987,318377,7,0,0,2,548,488,12,6,15
1,0.000298,4,492764,50,3,1211113,1220778,7,0,0,2,214,81,12,27,40
2,1.244523,3,6457,202,0,248,3257,76,0,0,53,17709,2206,11,22,43
3,3251.449219,4,1,6,3,338987,318377,7,0,0,6,1270,232,9,20,12
4,0.000255,4,440828,86,3,1211113,1220778,7,0,0,2,200,75,10,24,28


In [10]:
selected_features_botnet = [1, 1, 0, 0, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1]
selected_mask_botnet = np.array(selected_features_botnet, dtype=bool)
selected_columns_botnet = X_botnet.columns[selected_mask_botnet]
X_selected_botnet = X_botnet.loc[:, selected_columns_botnet]

In [11]:
clf1 = DecisionTreeClassifier(random_state=42)
clf1.fit(X_selected_botnet, y_botnet)

### isSpam

In [12]:
selected_features_spam = [1, 1, 0, 0, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1]
selected_mask_spam = np.array(selected_features_spam, dtype=bool)
selected_columns_spam = X_spam.columns[selected_mask_spam]
X_selected_spam = X_spam.loc[:, selected_columns_spam]

In [13]:
clf2 = DecisionTreeClassifier(random_state=42)
clf2.fit(X_selected_spam, y_spam)

### Prediction

In [14]:
# Predict botnet SPAM vs non-SPAM
y_pred = predictionStack(clf1, clf2, X_test, botnet_features=selected_features_botnet, spam_features=selected_features_spam)
np.savetxt('../Output/Revisi 3/sensor3_nobpso_result.txt', y_pred, fmt="%s", delimiter="\n")

### Evaluation

In [15]:
# Eval
print(classification_report(y_test, y_pred, digits=5))

              precision    recall  f1-score   support

           0    0.94553   0.96994   0.95758    826003
           1    0.50337   0.38140   0.43398     67570
           2    0.80245   0.11342   0.19875      5766

    accuracy                        0.92023    899339
   macro avg    0.75045   0.48825   0.53010    899339
weighted avg    0.91139   0.92023   0.91337    899339



In [None]:
# Eval
report = classification_report(y_test, y_pred, target_names=['Normal', 'Botnet Non SPAM', 'Botnet SPAM'], output_dict=True)
df_metrics = pd.DataFrame(report).transpose()
df_metrics = df_metrics[['precision', 'recall', 'f1-score']].drop(['accuracy', 'macro avg', 'weighted avg'])
df_metrics = df_metrics.rename(columns={'precision': 'Pre.', 'recall': 'Rec.', 'f1-score': 'F1'})
df_metrics

## Without BPSO

### isBotnet

In [13]:
selected_features_botnet = [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]
selected_mask_botnet = np.array(selected_features_botnet, dtype=bool)
selected_columns_botnet = X_botnet.columns[selected_mask_botnet]
X_selected_botnet = X_botnet.loc[:, selected_columns_botnet]

In [14]:
clf1 = DecisionTreeClassifier(random_state=42)
clf1.fit(X_selected_botnet, y_botnet)

### isSpam

In [15]:
selected_features_spam = [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]
selected_mask_spam = np.array(selected_features_spam, dtype=bool)
selected_columns_spam = X_spam.columns[selected_mask_spam]
X_selected_spam = X_spam.loc[:, selected_columns_spam]

In [16]:
clf2 = DecisionTreeClassifier(random_state=42)
clf2.fit(X_selected_spam, y_spam)

### Prediction

In [18]:
# Predict botnet SPAM vs non-SPAM
y_pred = predictionStack(clf1, clf2, X_test, botnet_features=selected_features_botnet, spam_features=selected_features_spam)
np.savetxt('../Output/Revisi 3/sensor3_nobpso_result.txt', y_pred, fmt="%s", delimiter="\n")

### Evaluation

In [19]:
# Eval
print(classification_report(y_test, y_pred, digits=5))

              precision    recall  f1-score   support

           0    0.98373   0.99713   0.99039    826003
           1    0.95882   0.80451   0.87491     67570
           2    0.99406   0.92907   0.96047      5766

    accuracy                        0.98223    899339
   macro avg    0.97887   0.91024   0.94192    899339
weighted avg    0.98193   0.98223   0.98152    899339



In [20]:
# Eval
report = classification_report(y_test, y_pred, target_names=['Normal', 'Botnet Non SPAM', 'Botnet SPAM'], output_dict=True)
df_metrics = pd.DataFrame(report).transpose()
df_metrics = df_metrics[['precision', 'recall', 'f1-score']].drop(['accuracy', 'macro avg', 'weighted avg'])
df_metrics = df_metrics.rename(columns={'precision': 'Pre.', 'recall': 'Rec.', 'f1-score': 'F1'})
df_metrics

Unnamed: 0,Pre.,Rec.,F1
Normal,0.983735,0.997134,0.990389
Botnet Non SPAM,0.958815,0.804514,0.874913
Botnet SPAM,0.994062,0.929067,0.960466


In [21]:
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy:", accuracy)

Accuracy: 0.9822258347519678


In [22]:
matrix = confusion_matrix(y_test, y_pred)
print(matrix)

[[823636   2335     32]
 [ 13209  54361      0]
 [   409      0   5357]]


## 10 particles

### isBotnet

In [23]:
selected_features_botnet = [0, 0, 1, 0, 1, 0, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1]
selected_mask_botnet = np.array(selected_features_botnet, dtype=bool)
selected_columns_botnet = X_botnet.columns[selected_mask_botnet]
X_selected_botnet = X_botnet.loc[:, selected_columns_botnet]

In [24]:
clf1 = DecisionTreeClassifier(random_state=42)
clf1.fit(X_selected_botnet, y_botnet)

### isSpam

In [25]:
selected_features_spam = [0, 1, 0, 1, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 1]
selected_mask_spam = np.array(selected_features_spam, dtype=bool)
selected_columns_spam = X_spam.columns[selected_mask_spam]
X_selected_spam = X_spam.loc[:, selected_columns_spam]

In [26]:
clf2 = DecisionTreeClassifier(random_state=42)
clf2.fit(X_selected_spam, y_spam)

### Prediction

In [27]:
# Predict botnet SPAM vs non-SPAM
y_pred = predictionStack(clf1, clf2, X_test, botnet_features=selected_features_botnet, spam_features=selected_features_spam)
np.savetxt('../Output/Revisi 3/sensor3_10_result.txt', y_pred, fmt="%s", delimiter="\n")

### Evaluation

In [28]:
# Eval
print(classification_report(y_test, y_pred, digits=5))

              precision    recall  f1-score   support

           0    1.00000   1.00000   1.00000    826003
           1    1.00000   1.00000   1.00000     67570
           2    1.00000   1.00000   1.00000      5766

    accuracy                        1.00000    899339
   macro avg    1.00000   1.00000   1.00000    899339
weighted avg    1.00000   1.00000   1.00000    899339



In [29]:
# Eval
report = classification_report(y_test, y_pred, target_names=['Normal', 'Botnet Non SPAM', 'Botnet SPAM'], output_dict=True)
df_metrics = pd.DataFrame(report).transpose()
df_metrics = df_metrics[['precision', 'recall', 'f1-score']].drop(['accuracy', 'macro avg', 'weighted avg'])
df_metrics = df_metrics.rename(columns={'precision': 'Pre.', 'recall': 'Rec.', 'f1-score': 'F1'})
df_metrics

Unnamed: 0,Pre.,Rec.,F1
Normal,1.0,1.0,1.0
Botnet Non SPAM,1.0,1.0,1.0
Botnet SPAM,1.0,1.0,1.0


In [30]:
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy:", accuracy)

Accuracy: 1.0


In [31]:
matrix = confusion_matrix(y_test, y_pred)
print(matrix)

[[826003      0      0]
 [     0  67570      0]
 [     0      0   5766]]


## 15 Particles

### isBotnet

In [32]:
selected_features_botnet = [1, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1, 0, 0, 1, 0]
selected_mask_botnet = np.array(selected_features_botnet, dtype=bool)
selected_columns_botnet = X_botnet.columns[selected_mask_botnet]
X_selected_botnet = X_botnet.loc[:, selected_columns_botnet]

In [33]:
clf1 = DecisionTreeClassifier(random_state=42)
clf1.fit(X_selected_botnet, y_botnet)

### isSpam

In [34]:
selected_features_spam = [1, 1, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0]
selected_mask_spam = np.array(selected_features_spam, dtype=bool)
selected_columns_spam = X_spam.columns[selected_mask_spam]
X_selected_spam = X_spam.loc[:, selected_columns_spam]

In [35]:
clf2 = DecisionTreeClassifier(random_state=42)
clf2.fit(X_selected_spam, y_spam)

### Prediction


In [36]:
# Predict botnet SPAM vs non-SPAM
y_pred = predictionStack(clf1, clf2, X_test, botnet_features=selected_features_botnet, spam_features=selected_features_spam)
np.savetxt('../Output/Revisi 3/sensor3_15_result.txt', y_pred, fmt="%s", delimiter="\n")

### Evaluation

In [37]:
# Eval
print(classification_report(y_test, y_pred, digits=5))

              precision    recall  f1-score   support

           0    0.99999   1.00000   1.00000    826003
           1    1.00000   0.99993   0.99996     67570
           2    1.00000   1.00000   1.00000      5766

    accuracy                        0.99999    899339
   macro avg    1.00000   0.99998   0.99999    899339
weighted avg    0.99999   0.99999   0.99999    899339



In [38]:
# Eval
report = classification_report(y_test, y_pred, target_names=['Normal', 'Botnet Non SPAM', 'Botnet SPAM'], output_dict=True)
df_metrics = pd.DataFrame(report).transpose()
df_metrics = df_metrics[['precision', 'recall', 'f1-score']].drop(['accuracy', 'macro avg', 'weighted avg'])
df_metrics = df_metrics.rename(columns={'precision': 'Pre.', 'recall': 'Rec.', 'f1-score': 'F1'})
df_metrics

Unnamed: 0,Pre.,Rec.,F1
Normal,0.999994,1.0,0.999997
Botnet Non SPAM,1.0,0.999926,0.999963
Botnet SPAM,1.0,1.0,1.0


In [39]:
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy:", accuracy)

Accuracy: 0.9999944403611987


In [40]:
matrix = confusion_matrix(y_test, y_pred)
print(matrix)

[[826003      0      0]
 [     5  67565      0]
 [     0      0   5766]]


## 20 Particles

### isBotnet

In [41]:
selected_features_botnet = [1, 1, 1, 0, 1, 1, 0, 0, 1, 1, 1, 1, 0, 0, 0, 1]
selected_mask_botnet = np.array(selected_features_botnet, dtype=bool)
selected_columns_botnet = X_botnet.columns[selected_mask_botnet]
X_selected_botnet = X_botnet.loc[:, selected_columns_botnet]

In [42]:
clf1 = DecisionTreeClassifier(random_state=42)
clf1.fit(X_selected_botnet, y_botnet)

### isSpam

In [43]:
selected_features_spam = [0, 1, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1, 0, 1, 0, 1]
selected_mask_spam = np.array(selected_features_spam, dtype=bool)
selected_columns_spam = X_spam.columns[selected_mask_spam]
X_selected_spam = X_spam.loc[:, selected_columns_spam]

In [44]:
clf2 = DecisionTreeClassifier(random_state=42)
clf2.fit(X_selected_spam, y_spam)

### Prediction

In [45]:
# Predict botnet SPAM vs non-SPAM
y_pred = predictionStack(clf1, clf2, X_test, botnet_features=selected_features_botnet, spam_features=selected_features_spam)
np.savetxt('../Output/Revisi 3/sensor3_20_result.txt', y_pred, fmt="%s", delimiter="\n")

### Evaluation

In [46]:
# Eval
print(classification_report(y_test, y_pred, digits=5))

              precision    recall  f1-score   support

           0    1.00000   0.99999   1.00000    826003
           1    0.99990   0.99999   0.99994     67570
           2    1.00000   1.00000   1.00000      5766

    accuracy                        0.99999    899339
   macro avg    0.99997   0.99999   0.99998    899339
weighted avg    0.99999   0.99999   0.99999    899339



In [47]:
# Eval
report = classification_report(y_test, y_pred, target_names=['Normal', 'Botnet Non SPAM', 'Botnet SPAM'], output_dict=True)
df_metrics = pd.DataFrame(report).transpose()
df_metrics = df_metrics[['precision', 'recall', 'f1-score']].drop(['accuracy', 'macro avg', 'weighted avg'])
df_metrics = df_metrics.rename(columns={'precision': 'Pre.', 'recall': 'Rec.', 'f1-score': 'F1'})
df_metrics

Unnamed: 0,Pre.,Rec.,F1
Normal,0.999999,0.999992,0.999995
Botnet Non SPAM,0.999896,0.999985,0.999941
Botnet SPAM,1.0,1.0,1.0


In [48]:
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy:", accuracy)

Accuracy: 0.9999911045779177


In [49]:
matrix = confusion_matrix(y_test, y_pred)
print(matrix)

[[825996      7      0]
 [     1  67569      0]
 [     0      0   5766]]
