In [1]:
import sys
import os
sys.path.append(os.path.abspath(os.path.join('../..')))

ROOT_DIR = os.path.abspath(os.path.join(os.getcwd(), "../.."))
LOGS_DIR = os.path.join(ROOT_DIR, 'logs')

In [2]:
from src.helpers import io
from src.helpers import edaTools

In [3]:
import numpy as np
import pandas as pd
import math

import matplotlib.pyplot as plt
from matplotlib.animation import FuncAnimation
import seaborn as sns

import warnings
warnings.filterwarnings('ignore', category=UserWarning)

In [4]:
import random
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier

from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from sklearn.preprocessing import StandardScaler
# from imblearn.over_sampling import SMOTE

In [5]:
file_path = ROOT_DIR+'\\data\\processed\\WSN-DS_balance.csv'
df = pd.read_csv(file_path)
df.head()

Unnamed: 0,id,Time,Is_CH,who CH,Dist_To_CH,ADV_S,ADV_R,JOIN_S,JOIN_R,SCH_S,SCH_R,Rank,DATA_S,DATA_R,Data_Sent_To_BS,dist_CH_To_BS,send_code,Expaned Energy,Attack type
0,606079,3053,1,606100,0.0,1,27,0,0,0,0,0,0,0,0,0.0,0,0.04985,Grayhole
1,406039,1953,1,406100,0.0,1,22,0,0,0,0,0,0,0,0,0.0,0,0.39332,Blackhole
2,303085,1653,0,303006,2.96837,0,13,1,0,0,1,3,0,0,0,0.0,8,0.00594,Normal
3,402086,2003,1,402100,0.0,1,25,0,0,0,0,0,0,0,0,0.0,0,0.0502,Grayhole
4,603098,2753,1,603100,0.0,1,10,0,1,1,0,0,0,234,234,126.74493,0,2.48271,TDMA


In [6]:
wsn_ds = df.copy(deep=True)

# Preprocessing the dataset
wsn_ds['Attack_label'] = wsn_ds['Attack type'].map(lambda x: 1 if x != 'Normal' else 0)

# Clean up column names by stripping extra spaces
wsn_ds.columns = wsn_ds.columns.str.strip()

features = ['Time', 'Is_CH', 'Dist_To_CH', 'ADV_S', 'ADV_R', 'JOIN_S',
            'JOIN_R', 'SCH_S', 'SCH_R', 'DATA_S', 'DATA_R',
            'Data_Sent_To_BS', 'dist_CH_To_BS', 'send_code', 'Expaned Energy']

X = wsn_ds[features].values  # Feature matrix
y = wsn_ds['Attack_label'].values  # Target labels

# Splitting the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Normalize the features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)


In [8]:
from joblib import dump, load
from sklearn.feature_selection import mutual_info_classif

In [9]:
def iaco_feature_selection(X, y, num_ants=10, num_iterations=20, alpha=1.0, beta=2.0, evaporation_rate=0.5):
    num_features = X.shape[1]
    pheromones = np.ones(num_features)  # Initialize pheromones for each feature
    best_features = []
    best_score = 0

    # Calculate heuristic information (e.g., mutual information)
    heuristic_info = mutual_info_classif(X, y)

    for iteration in range(num_iterations):
        all_feature_subsets = []
        all_scores = []

        for ant in range(num_ants):
            selected_features = []
            for feature in range(num_features):
                # Probability influenced by pheromone and heuristic information
                prob = (pheromones[feature] ** alpha) * (heuristic_info[feature] ** beta)
                if random.random() < prob:
                    selected_features.append(feature)

            # Ensure at least one feature is selected
            if len(selected_features) == 0:
                selected_features = [random.randint(0, num_features-1)]

            # Train classifier (e.g., SVM) on the selected features
            X_subset = X[:, selected_features]
            svm_clf = SVC(kernel='rbf', random_state=42)
            svm_clf.fit(X_subset, y)
            score = svm_clf.score(X_subset, y)

            all_feature_subsets.append(selected_features)
            all_scores.append(score)

        # Find the best subset in this iteration
        iteration_best_score = max(all_scores)
        iteration_best_features = all_feature_subsets[all_scores.index(iteration_best_score)]

        if iteration_best_score > best_score:
            best_score = iteration_best_score
            best_features = iteration_best_features

        # Update pheromones
        pheromones *= (1 - evaporation_rate)             # Evaporation
        for feature in iteration_best_features:
            pheromones[feature] += iteration_best_score  # Deposit pheromones based on performance

        # Normalize pheromones to avoid overflow
        pheromones = pheromones / np.max(pheromones)

        print(f"Iteration {iteration+1}/{num_iterations} - Best Score: {best_score:.4f}")

    return best_features, best_score

# Run EACO feature selection on resampled training data
best_features, best_score = iaco_feature_selection(X_train, y_train, num_ants=10, num_iterations=20)

print(f"\nSelected Features by EACO: {best_features}")
print(f"Best Training Accuracy by EACO: {best_score:.4f}")

# Train SVM and KNN classifiers on selected features
X_train_selected = X_train[:, best_features]
X_test_selected = X_test[:, best_features]

# SVM Classifier
svm_clf = SVC(kernel='rbf', random_state=42)
svm_clf.fit(X_train_selected, y_train)
svm_predictions = svm_clf.predict(X_test_selected)

# KNN Classifier
knn_clf = KNeighborsClassifier(n_neighbors=3)
knn_clf.fit(X_train_selected, y_train)
knn_predictions = knn_clf.predict(X_test_selected)

# Calculate performance metrics for SVM
svm_accuracy = accuracy_score(y_test, svm_predictions)
svm_precision = precision_score(y_test, svm_predictions, zero_division=0)
svm_recall = recall_score(y_test, svm_predictions, zero_division=0)
svm_f1 = f1_score(y_test, svm_predictions, zero_division=0)

# Calculate performance metrics for KNN
knn_accuracy = accuracy_score(y_test, knn_predictions)
knn_precision = precision_score(y_test, knn_predictions, zero_division=0)
knn_recall = recall_score(y_test, knn_predictions, zero_division=0)
knn_f1 = f1_score(y_test, knn_predictions, zero_division=0)

# Preparing the results for display
performance_data = {
    'Classifier': ['SVM', 'KNN'],
    'Accuracy': [svm_accuracy, knn_accuracy],
    'Precision': [svm_precision, knn_precision],
    'Recall': [svm_recall, knn_recall],
    'F1 Score': [svm_f1, knn_f1]
}

# Convert results to DataFrame for better visualization
performance_df = pd.DataFrame(performance_data)

# Display the results
print("\nClassifier Performance Comparison:")
print(performance_df)
# 190min 4.3s


Iteration 1/20 - Best Score: 0.9795
Iteration 2/20 - Best Score: 0.9795
Iteration 3/20 - Best Score: 0.9795
Iteration 4/20 - Best Score: 0.9795
Iteration 5/20 - Best Score: 0.9795
Iteration 6/20 - Best Score: 0.9795
Iteration 7/20 - Best Score: 0.9795
Iteration 8/20 - Best Score: 0.9795
Iteration 9/20 - Best Score: 0.9795
Iteration 10/20 - Best Score: 0.9795
Iteration 11/20 - Best Score: 0.9795
Iteration 12/20 - Best Score: 0.9795
Iteration 13/20 - Best Score: 0.9795
Iteration 14/20 - Best Score: 0.9795
Iteration 15/20 - Best Score: 0.9795
Iteration 16/20 - Best Score: 0.9795
Iteration 17/20 - Best Score: 0.9795
Iteration 18/20 - Best Score: 0.9795
Iteration 19/20 - Best Score: 0.9795
Iteration 20/20 - Best Score: 0.9795

Selected Features by EACO: [1, 9]
Best Training Accuracy by EACO: 0.9795

Classifier Performance Comparison:
  Classifier  Accuracy  Precision    Recall  F1 Score
0        SVM  0.980856   0.976082  0.988701  0.982351
1        KNN  0.980856   0.976082  0.988701  0.9823

In [10]:
dump(svm_clf, ROOT_DIR+'\\models\\final\\svm_model.joblib')
dump(knn_clf, ROOT_DIR+'\\models\\final\\knn_model.joblib')
print('Model saved 💾')

# loaded_svm_model = load(ROOT_DIR+'\\models\\reference\\svm_ref_model.joblib')

Model saved 💾
