In [1]:
"""
Monarch Mayfly Optimization (MMO) Algorithm for Feature Selection
=================================================================
This implementation of the Monarch Mayfly Optimization (MMO) algorithm
is a hybrid of the Monarch Butterfly Optimization (MBO) and the Mayfly Algorithm (MA).
Sources and Acknowledgements:
-----------------------------
1. Monarch Butterfly Optimization (MBO):
- Original implementation in Python by Justin van Zyl.
- Based on the study:
Wang G., Deb S., Cui Z., "Monarch Butterfly Optimization," Neural Comput & Applic 31:1995-2014.
doi: 10.1007/s00521-015-1923-y.
- Key operations utilized: Migration Operator, Adjusting Operator, and Elitism.
2. Mayfly Algorithm (MA):
- Extracted from the hybrid feature selection study:
Bhattacharyya, T., Chatterjee, B., Singh, P. K., Yoon, J. H., Geem, Z. W., & Sarkar, R. (2020).
"Mayfly in harmony: A new hybrid meta-heuristic feature selection algorithm," IEEE Access, 8, 195929-195945.
- The study hybridized MA with Harmony Search (HS). The MA part is used in this MMO hybrid.
Disclaimer:
-----------
This code is a hybrid implementation of the aforementioned algorithms and combines elements from both
to create the MMO algorithm for the purpose of feature selection. Full credit goes to the original authors
for their contributions.
"""



In [2]:
import numpy as np
import pandas as pd
import math
import random
from time import process_time
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier

In [3]:
start_time = process_time()

In [6]:
# Load dataset
df = pd.read_csv('datasets\Breastcancer.csv')
tot_features = len(df.columns) - 1
x = df[df.columns[:tot_features]]
y = df[df.columns[-1]]
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, stratify=y)

In [7]:
# Train classifier using original dataset
_classifier = KNeighborsClassifier(n_neighbors=5)
_classifier.fit(x_train, y_train)
predictions = _classifier.predict(x_test)
total_acc = accuracy_score(y_true=y_test, y_pred=predictions)
total_error = 1 - total_acc
total_features = tot_features
total_acc

0.6571428571428571

In [8]:
# Controlling parameters
swarm_size = 20
max_iterations = 20
alpha = 0.01
a1 = 3
a2 = 3.5
beta = 0.1
d = 3
fl = 3
l = 0.95
g = 1
delta = 0.9
lf_size = 1
adjusting_rate = 0.1
p = float(6/12)
s_max = 0.02
gmax=9.8
gmin=6
max_neighbors = 20

In [11]:
# Population structure and Initialization
subpop_size = swarm_size // 2
NP1_male_swarm_vel = np.zeros((subpop_size, tot_features))
NP1_female_swarm_vel = np.zeros((subpop_size, tot_features))
NP2_male_swarm_vel = np.zeros((subpop_size, tot_features))
NP2_female_swarm_vel = np.zeros((subpop_size, tot_features))

NP1_male_swarm_pos = np.random.uniform(low=-1, high=1, size=(subpop_size, tot_features))
NP1_female_swarm_pos = np.random.uniform(low=-1, high=1, size=(subpop_size, tot_features))
NP2_male_swarm_pos = np.random.uniform(low=-1, high=1, size=(subpop_size, tot_features))
NP2_female_swarm_pos = np.random.uniform(low=-1, high=1, size=(subpop_size, tot_features))

gbest_fitness = 1000000
pbest_fitness = np.empty(swarm_size)
pbest_fitness.fill(np.inf)
pbest = np.zeros((swarm_size, tot_features))
gbest = np.zeros(tot_features)
NP1_male_fitness = np.empty(subpop_size)
NP1_female_fitness = np.empty(subpop_size)
NP2_male_fitness = np.empty(subpop_size)
NP2_female_fitness = np.empty(subpop_size)
NP1_vmax_male = np.empty(tot_features)
NP1_vmax_female = np.empty(tot_features)
NP2_vmax_male = np.empty(tot_features)
NP2_vmax_female = np.empty(tot_features)

In [13]:
# S-shaped transfer function
def transfer_func(velocity):
    s1 = np.abs(velocity) * 0.5 + 1
    s1 = (-velocity) / s1 + 0.5
    return s1

In [14]:
# Fitness function
def find_fitness(particle):
    features = [df.columns[i] for i, v in enumerate(transfer_func(particle)) if v >= 0.25]
    if not features:
        return 10000
    new_x_train = x_train[features].copy()
    new_x_test = x_test[features].copy()
    _classifier = KNeighborsClassifier(n_neighbors=5)
    _classifier.fit(new_x_train, y_train)
    predictions = _classifier.predict(new_x_test)
    acc = accuracy_score(y_true=y_test, y_pred=predictions)
    err = 1 - acc
    num_features = len(features)
    fitness = alpha * (num_features / total_features) + (1 - alpha) * err
    return fitness

In [16]:
# Levy Flight function
def levy_flight(size):
    return np.sum(np.tan(math.pi * np.random.uniform(low=0, high=1, size=(1, size))))

In [18]:
def migration_operator(migrant_male_swarm_pos, female_swarm_pos, male_swarm_pos, peri, p):
    D = len(migrant_male_swarm_pos[0]) # Assuming all butterflies have the same dimensionality D
    
    for i in range(subpop_size):
        # Evaluate fitness for the current butterfly
        current_fitness = find_fitness(migrant_male_swarm_pos[i][1:])

        for k in range(1, D): # Starting from 1 as the 0th element is skipped (fitness)
            rand = np.random.uniform(low=0, high=1)
            r = rand * peri
            if r <= p:
                random_female_index = np.random.randint(0, subpop_size)
                selected_butterfly = female_swarm_pos[random_female_index]
            else:
                random_female_index = np.random.randint(0, subpop_size)
                selected_butterfly = male_swarm_pos[random_female_index]

            # Generate the kth element of the new butterfly
            migrant_male_swarm_pos[i][k] = selected_butterfly[k]

        # Evaluate fitness for the new butterfly
        new_fitness = find_fitness(migrant_male_swarm_pos[i][1:])

        # If the new fitness is better, update the 0th element of the butterfly
        if new_fitness < current_fitness:
            migrant_male_swarm_pos[i][0] = new_fitness

        # Print relevant information for debugging
        #print(f"For mayfly {i}: Current Fitness: {current_fitness}, New Fitness: {new_fitness}")

    return migrant_male_swarm_pos

In [19]:
def update_vmax(swarm_pos_male, swarm_pos_female, vmax_male, vmax_female, subpop_size):
    for j in range(len(vmax_male)):
        r = np.random.normal(0, 1)
        index_male = min(subpop_size - 1, len(swarm_pos_male) - 1)
        index_female = min(subpop_size - 1, len(swarm_pos_female) - 1)

        vmax_male[j] = (swarm_pos_male[0][j] - swarm_pos_male[index_male][j]) * r
        vmax_female[j] = (swarm_pos_female[0][j] - swarm_pos_female[index_female][j]) * r

In [None]:
def sort_population(population_pos, population_vel):
    # Calculate fitness for each individual
    population_fitness = np.array([find_fitness(individual) for individual in population_pos])
    
    # Sort the population based on fitness
    sort_order = np.argsort(population_fitness)
    population_fitness = population_fitness[sort_order]
    population_pos = population_pos[sort_order]
    population_vel = population_vel[sort_order]

    return population_fitness, population_pos, population_vel