# Notebook for Purposed Method

<img src="https://kulacino.my.id/assets/images/nb/header-notebook-its.png">

- Hartawan Bahari Mulyadi
- 6025241067
---
The code in this notebooks is part or replication process based on the referenced paper.


In [1]:
import sys
import os
sys.path.append(os.path.abspath(os.path.join('../..')))

ROOT_DIR = os.path.abspath(os.path.join(os.getcwd(), "../.."))
LOGS_DIR = os.path.join(ROOT_DIR, 'logs')

In [2]:
from src.helpers import io
from src.helpers import edaTools

In [3]:
import numpy as np
import pandas as pd
import math

import matplotlib.pyplot as plt
from matplotlib.animation import FuncAnimation
import seaborn as sns

import warnings
warnings.filterwarnings('ignore', category=UserWarning)

In [4]:
import random
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier

from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from sklearn.preprocessing import StandardScaler
# from imblearn.over_sampling import SMOTE

In [8]:
file_path = ROOT_DIR+'\\data\\raw\\WSN-DS.csv'
df = pd.read_csv(file_path)
df

Unnamed: 0,id,Time,Is_CH,who CH,Dist_To_CH,ADV_S,ADV_R,JOIN_S,JOIN_R,SCH_S,SCH_R,Rank,DATA_S,DATA_R,Data_Sent_To_BS,dist_CH_To_BS,send_code,Expaned Energy,Attack type
0,101000,50,1,101000,0.00000,1,0,0,25,1,0,0,0,1200,48,130.08535,0,2.46940,Normal
1,101001,50,0,101044,75.32345,0,4,1,0,0,1,2,38,0,0,0.00000,4,0.06957,Normal
2,101002,50,0,101010,46.95453,0,4,1,0,0,1,19,41,0,0,0.00000,3,0.06898,Normal
3,101003,50,0,101044,64.85231,0,4,1,0,0,1,16,38,0,0,0.00000,4,0.06673,Normal
4,101004,50,0,101010,4.83341,0,4,1,0,0,1,25,41,0,0,0.00000,3,0.06534,Normal
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
374656,201096,1003,0,201051,6.98337,0,5,1,0,0,1,7,96,0,67,170.14779,3,0.15974,Normal
374657,201097,1003,0,201037,29.32867,0,5,1,0,0,1,31,39,0,24,82.21043,2,0.06877,Normal
374658,201098,1003,0,201095,18.51963,0,5,1,0,0,1,17,55,0,31,139.26438,1,0.09437,Normal
374659,201099,1003,0,201051,8.55001,0,5,1,0,0,1,3,96,0,65,158.27492,3,0.16047,Normal


In [6]:
wsn_ds = df.copy(deep=True)

# Preprocessing the dataset
wsn_ds['Attack_label'] = wsn_ds['Attack type'].map(lambda x: 1 if x != 'Normal' else 0)

# Clean up column names by stripping extra spaces
wsn_ds.columns = wsn_ds.columns.str.strip()

features = ['Time', 'Is_CH', 'Dist_To_CH', 'ADV_S', 'ADV_R', 'JOIN_S',
            'JOIN_R', 'SCH_S', 'SCH_R', 'DATA_S', 'DATA_R',
            'Data_Sent_To_BS', 'dist_CH_To_BS', 'send_code', 'Expaned Energy']

X = wsn_ds[features].values  # Feature matrix
y = wsn_ds['Attack_label'].values  # Target labels

# Splitting the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Normalize the features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)


In [7]:
# Ant Colony Optimization for feature selection
def aco_feature_selection(X_train, y_train, num_ants, num_iterations):
    pheromones = np.ones(X_train.shape[1])  # Initialize pheromones for each feature
    best_features = None
    best_accuracy = 0

    for iteration in range(num_iterations):
        for ant in range(num_ants):
            # Randomly select a subset of features based on pheromone levels
            selected_features = [i for i in range(X_train.shape[1]) if random.random() < pheromones[i]]

            # Ensure at least one feature is selected
            if len(selected_features) == 0:
                selected_features = [random.choice(range(X_train.shape[1]))]

            # Train classifier (SVM) on the selected features
            X_train_selected = X_train[:, selected_features]
            svm_clf = SVC(kernel='rbf')
            svm_clf.fit(X_train_selected, y_train)
            accuracy = svm_clf.score(X_train_selected, y_train)

            # Update best solution if current is better
            if accuracy > best_accuracy:
                best_accuracy = accuracy
                best_features = selected_features

        # Update pheromones based on best accuracy
        for feature in best_features:
            pheromones[feature] += best_accuracy * 0.1
        pheromones = np.clip(pheromones, 0.1, 1.0)  # Keep pheromone levels within a range

    return best_features, best_accuracy

# Run ACO feature selection
best_features, best_accuracy = aco_feature_selection(X_train, y_train, num_ants=10, num_iterations=20)

# Train SVM and KNN classifiers on selected features
X_train_selected = X_train[:, best_features]
X_test_selected = X_test[:, best_features]

# SVM Classifier
svm_clf = SVC(kernel='rbf')
svm_clf.fit(X_train_selected, y_train)
svm_predictions = svm_clf.predict(X_test_selected)

# KNN Classifier
knn_clf = KNeighborsClassifier(n_neighbors=3)
knn_clf.fit(X_train_selected, y_train)
knn_predictions = knn_clf.predict(X_test_selected)

# Calculate performance metrics for SVM
svm_accuracy = accuracy_score(y_test, svm_predictions)
svm_precision = precision_score(y_test, svm_predictions, zero_division=0)
svm_recall = recall_score(y_test, svm_predictions, zero_division=0)
svm_f1 = f1_score(y_test, svm_predictions, zero_division=0)

# Calculate performance metrics for KNN
knn_accuracy = accuracy_score(y_test, knn_predictions)
knn_precision = precision_score(y_test, knn_predictions, zero_division=0)
knn_recall = recall_score(y_test, knn_predictions, zero_division=0)
knn_f1 = f1_score(y_test, knn_predictions, zero_division=0)

# Preparing the results for display
performance_data = {
    'Classifier': ['SVM', 'KNN'],
    'Accuracy': [svm_accuracy, knn_accuracy],
    'Precision': [svm_precision, knn_precision],
    'Recall': [svm_recall, knn_recall],
    'F1 Score': [svm_f1, knn_f1]
}

# Convert results to DataFrame for better visualization
performance_df = pd.DataFrame(performance_data)

# Display the results
aco_results = {
    "Best Features Selected by ACO": best_features,
    "Best Accuracy During ACO Feature Selection": best_accuracy,
}

# Print the results
print(aco_results)
print("\nClassifier Performance Comparison:")
print(performance_df)
#52min 35.6s

{'Best Features Selected by ACO': [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14], 'Best Accuracy During ACO Feature Selection': 0.987349610757254}

Classifier Performance Comparison:
  Classifier  Accuracy  Precision    Recall  F1 Score
0        SVM  0.988493   0.991063  0.987551  0.989304
1        KNN  0.989267   0.993443  0.986594  0.990007


In [10]:
from joblib import dump, load

dump(svm_clf, ROOT_DIR+'\\models\\reference\\svm_ref_model.joblib')
dump(knn_clf, ROOT_DIR+'\\models\\reference\\knn_ref_model.joblib')
print('Model saved 💾')

# loaded_svm_model = load(ROOT_DIR+'\\models\\reference\\svm_ref_model.joblib')

Model saved 💾
