In [1]:
import sys
import os
sys.path.append(os.path.abspath(os.path.join('../..')))

ROOT_DIR = os.path.abspath(os.path.join(os.getcwd(), "../.."))
LOGS_DIR = os.path.join(ROOT_DIR, 'logs')

In [2]:
from src.helpers import io
from src.helpers import edaTools

In [3]:
import numpy as np
import pandas as pd
import math

import matplotlib.pyplot as plt
from matplotlib.animation import FuncAnimation
import seaborn as sns

import warnings
warnings.filterwarnings('ignore', category=UserWarning)

In [4]:
import random
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier

from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from sklearn.preprocessing import StandardScaler
# from imblearn.over_sampling import SMOTE

In [5]:
file_path = ROOT_DIR+'\\data\\processed\\WSN-DS_balance.csv'
df = pd.read_csv(file_path)
df.head()

Unnamed: 0,id,Time,Is_CH,who CH,Dist_To_CH,ADV_S,ADV_R,JOIN_S,JOIN_R,SCH_S,SCH_R,Rank,DATA_S,DATA_R,Data_Sent_To_BS,dist_CH_To_BS,send_code,Expaned Energy,Attack type
0,606079,3053,1,606100,0.0,1,27,0,0,0,0,0,0,0,0,0.0,0,0.04985,Grayhole
1,406039,1953,1,406100,0.0,1,22,0,0,0,0,0,0,0,0,0.0,0,0.39332,Blackhole
2,303085,1653,0,303006,2.96837,0,13,1,0,0,1,3,0,0,0,0.0,8,0.00594,Normal
3,402086,2003,1,402100,0.0,1,25,0,0,0,0,0,0,0,0,0.0,0,0.0502,Grayhole
4,603098,2753,1,603100,0.0,1,10,0,1,1,0,0,0,234,234,126.74493,0,2.48271,TDMA


In [6]:
wsn_ds = df.copy(deep=True)

# Preprocessing the dataset
wsn_ds['Attack_label'] = wsn_ds['Attack type'].map(lambda x: 1 if x != 'Normal' else 0)

# Clean up column names by stripping extra spaces
wsn_ds.columns = wsn_ds.columns.str.strip()

features = ['Time', 'Is_CH', 'Dist_To_CH', 'ADV_S', 'ADV_R', 'JOIN_S',
            'JOIN_R', 'SCH_S', 'SCH_R', 'DATA_S', 'DATA_R',
            'Data_Sent_To_BS', 'dist_CH_To_BS', 'send_code', 'Expaned Energy']

X = wsn_ds[features].values  # Feature matrix
y = wsn_ds['Attack_label'].values  # Target labels

# Splitting the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Normalize the features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)


In [7]:
# Ant Colony Optimization for feature selection with 2-opt
def aco_feature_selection_2opt(X_train, y_train, num_ants, num_iterations):
    pheromones = np.ones(X_train.shape[1])  # Initialize pheromones for each feature
    best_features = None
    best_accuracy = 0

    for iteration in range(num_iterations):
        for ant in range(num_ants):
            # Randomly select a subset of features based on pheromone levels
            selected_features = [i for i in range(X_train.shape[1]) if random.random() < pheromones[i]]

            # Ensure at least one feature is selected
            if len(selected_features) == 0:
                selected_features = [random.choice(range(X_train.shape[1]))]

            # Train classifier (SVM) on the selected features
            X_train_selected = X_train[:, selected_features]
            svm_clf = SVC(kernel='rbf')
            svm_clf.fit(X_train_selected, y_train)
            accuracy = svm_clf.score(X_train_selected, y_train)

            # Apply 2-opt to improve the selected features
            improved_features, improved_accuracy = two_opt(selected_features, X_train, y_train, accuracy, svm_clf)

            # Update best solution if current is better
            if improved_accuracy > best_accuracy:
                best_accuracy = improved_accuracy
                best_features = improved_features

        # Update pheromones based on best accuracy
        for feature in best_features:
            pheromones[feature] += best_accuracy * 0.1
        pheromones = np.clip(pheromones, 0.1, 1.0)  # Keep pheromone levels within a range

        print(f"Iteration {iteration+1}/{num_iterations} - Best Score: {best_accuracy:.4f}")

    return best_features, best_accuracy

# 2-opt algorithm to improve feature selection
def two_opt(selected_features, X_train, y_train, current_accuracy, svm_clf):
    improved = False
    best_features = selected_features.copy()
    best_accuracy = current_accuracy

    for i in range(len(selected_features) - 1):
        for j in range(i + 1, len(selected_features)):
            # Swap two features
            swapped_features = best_features.copy()
            swapped_features[i], swapped_features[j] = swapped_features[j], swapped_features[i]

            # Train classifier on swapped features
            X_train_swapped = X_train[:, swapped_features]
            svm_clf.fit(X_train_swapped, y_train)
            swapped_accuracy = svm_clf.score(X_train_swapped, y_train)

            # If swapped accuracy is better, keep the swap
            if swapped_accuracy > best_accuracy:
                best_accuracy = swapped_accuracy
                best_features = swapped_features
                improved = True

    if improved:
        return best_features, best_accuracy
    else:
        return selected_features, current_accuracy

# Example usage
# Assuming 'X_train' and 'y_train' are already prepared from the dataset as shown in your original implementation
best_features, best_accuracy = aco_feature_selection_2opt(X_train, y_train, num_ants=10, num_iterations=20)

# Train SVM and KNN classifiers on selected features
X_train_selected = X_train[:, best_features]
X_test_selected = X_test[:, best_features]

# SVM Classifier
svm_clf = SVC(kernel='rbf')
svm_clf.fit(X_train_selected, y_train)
svm_predictions = svm_clf.predict(X_test_selected)

# KNN Classifier
knn_clf = KNeighborsClassifier(n_neighbors=3)
knn_clf.fit(X_train_selected, y_train)
knn_predictions = knn_clf.predict(X_test_selected)

# Calculate performance metrics for SVM
svm_accuracy = accuracy_score(y_test, svm_predictions)
svm_precision = precision_score(y_test, svm_predictions, zero_division=0)
svm_recall = recall_score(y_test, svm_predictions, zero_division=0)
svm_f1 = f1_score(y_test, svm_predictions, zero_division=0)

# Calculate performance metrics for KNN
knn_accuracy = accuracy_score(y_test, knn_predictions)
knn_precision = precision_score(y_test, knn_predictions, zero_division=0)
knn_recall = recall_score(y_test, knn_predictions, zero_division=0)
knn_f1 = f1_score(y_test, knn_predictions, zero_division=0)

# Preparing the results for display
performance_data = {
    'Classifier': ['SVM', 'KNN'],
    'Accuracy': [svm_accuracy, knn_accuracy],
    'Precision': [svm_precision, knn_precision],
    'Recall': [svm_recall, knn_recall],
    'F1 Score': [svm_f1, knn_f1]
}

# Convert results to DataFrame for better visualization
performance_df = pd.DataFrame(performance_data)

# Display the results
aco_results = {
    "Best Features Selected by ACO + 2-opt": best_features,
    "Best Accuracy During ACO + 2-opt Feature Selection": best_accuracy,
}

# Print the results
print(aco_results)
print("\nClassifier Performance Comparison:")
print(performance_df)
# 243min 58s (not done yet)

KeyboardInterrupt: 

In [10]:
from joblib import dump, load

dump(svm_clf, ROOT_DIR+'\\models\\reference\\svm_ref_model.joblib')
dump(knn_clf, ROOT_DIR+'\\models\\reference\\knn_ref_model.joblib')
print('Model saved 💾')

# loaded_svm_model = load(ROOT_DIR+'\\models\\reference\\svm_ref_model.joblib')

Model saved 💾
