In [1]:
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.tree import DecisionTreeClassifier
import pandas as pd 
import numpy as np 
from datetime import datetime

In [None]:
dataset = pd.read_csv('./Dataset/Dataset.csv')

In [None]:
dataset.shape

In [None]:
dataset.head()

In [None]:
X_botnet = dataset.drop(columns=['isBotnet', 'isSpam', 'Label', 'BotnetName'])
y_botnet = dataset['isBotnet']
X_spam = dataset.drop(columns=['isBotnet', 'isSpam', 'Label'])
y_spam = dataset['isSpam']

# BPSO

In [None]:
# BPSO

# Sigmoid function to convert velocity into probabilities
def sigmoid(v):
    return 1 / (1 + np.exp(-v))

# Function to update position (binary)
def update_position(velocity):
    probability = sigmoid(velocity)
    new_position = np.random.rand(len(velocity)) < probability
    return new_position.astype(int)  # Convert boolean to binary (0 or 1)

# Fitness function (based on model accuracy)
def fitness_function(selected_features, X, y):
    # Ensure selected_features is a valid boolean mask
    selected_features = selected_features.astype(bool)
    
    if np.sum(selected_features) == 0:  # Avoid selecting no features
        return 1.0  # Poor fitness if no features are selected
    
    # Select features based on the boolean mask
    # If X is a DataFrame, you can use X.loc[:, selected_features]
    # If X is a NumPy array, simply use X[:, selected_features]
    if isinstance(X, np.ndarray):
        selected_X = X[:, selected_features]
    elif isinstance(X, pd.DataFrame):
        selected_X = X.loc[:, selected_features]

    # Split data
    X_train, X_test, y_train, y_test = train_test_split(selected_X, y, test_size=0.25, random_state=42)

    # Train a decision tree classifier
    model = DecisionTreeClassifier()
    model.fit(X_train, y_train)

    # Predict and calculate accuracy
    y_pred = model.predict(X_test)
    accuracy = accuracy_score(y_test, y_pred)

    return 1 - accuracy  # Return 1 - accuracy to minimize the fitness

# BPSO Algorithm for Feature Selection
def bpso_feature_selection(X, y, n_particles=50, max_iter=100, w=0.5, c1=1.5, c2=1.5):
    n_features = X.shape[1]
    
    # Initialize particle positions (binary) and velocities
    positions = np.random.randint(2, size=(n_particles, n_features))
    velocities = np.random.uniform(-1, 1, (n_particles, n_features))
    
    # Initialize personal best positions and global best position
    pbest_positions = positions.copy()
    pbest_fitness = np.array([fitness_function(positions[i], X, y) for i in range(n_particles)])
    
    gbest_position = pbest_positions[np.argmin(pbest_fitness)]
    gbest_fitness = np.min(pbest_fitness)
    
    # BPSO main loop
    for iteration in range(max_iter):
        for i in range(n_particles):
            # Update velocity
            r1, r2 = np.random.rand(n_features), np.random.rand(n_features)
            velocities[i] = (w * velocities[i] +
                             c1 * r1 * (pbest_positions[i] - positions[i]) +
                             c2 * r2 * (gbest_position - positions[i]))
            
            # Update position (convert velocity to binary)
            positions[i] = update_position(velocities[i])
            
            # Calculate new fitness
            fitness = fitness_function(positions[i], X, y)
            
            # Update personal best if the current position is better
            if fitness < pbest_fitness[i]:
                pbest_positions[i] = positions[i]
                pbest_fitness[i] = fitness
        
        # Update global best if a better solution is found
        best_particle_idx = np.argmin(pbest_fitness)
        if pbest_fitness[best_particle_idx] < gbest_fitness:
            gbest_position = pbest_positions[best_particle_idx]
            gbest_fitness = pbest_fitness[best_particle_idx]
        
        print(f"Iteration {iteration+1}/{max_iter}, Best Fitness: {gbest_fitness}")
    
    return gbest_position, 1 - gbest_fitness  # Return best feature subset and corresponding accuracy

In [None]:
# DT-1
best_features, best_accuracy = bpso_feature_selection(X_botnet, y_botnet)