<a href="https://colab.research.google.com/github/hamidzangiabadi/sailfish-optimization-algorithm/blob/main/Method.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install ucimlrepo
!pip install mealpy
!pip install scikit-learn

# import datasets

In [1]:
from ucimlrepo import fetch_ucirepo

# fetch dataset
wine = fetch_ucirepo(id=109)
# print("wine loaded!")
# breast_cancer = fetch_ucirepo(id=14)
# print("breast_cancer loaded!")
# zoo = fetch_ucirepo(id=111)
# print("zoo loaded!")
# automobile = fetch_ucirepo(id=10)
# print("automobile loaded")
# lymphography = fetch_ucirepo(id=63)
# print("lymphography loaded")
# student_performance = fetch_ucirepo(id=320) 
# print("student_performance loaded")
# ionosphere = fetch_ucirepo(id=52)
# print("ionosphere loaded")
# credit_approval = fetch_ucirepo(id=27) 
# print("credit_approval loaded")
# hepatitis = fetch_ucirepo(id=46) 
# print("hepatitis loaded")



In [2]:
Datasets = [
            { "name" : "Wine" , "dataset" : wine },
#             { "name" : "Zoo" , "dataset" : zoo },
#             { "name" : "Breast Cancer" , "dataset" : breast_cancer },
#             { "name" : "Ionosphere" , "dataset" : ionosphere },
#             { "name" : "Credit Approval" , "dataset" : credit_approval },
#             { "name" : "Hepatitis" , "dataset" : hepatitis },
#             { "name" : "Automobile" , "dataset" : automobile },
#             { "name" : "Lymphography" , "dataset" : lymphography },
#             { "name" : "Student Performance" , "dataset" : student_performance },
           ]

## modify datasets and required information





Preprocessing to convert non int and float columns using label encoder

In [3]:
from sklearn.preprocessing import LabelEncoder
import pandas as pd



for x in Datasets:

  label_encoder = LabelEncoder()
  df = x["dataset"].data.features
  object_columns = df.select_dtypes(include=['object']).columns
  # Apply LabelEncoder to each object column
  for column in object_columns:
      df[column + '_encoded'] = label_encoder.fit_transform(df[column])
  df = df.drop(columns=object_columns)

  x["dataset"].data.features = df

  targetdf = x["dataset"].data.targets
  object_columns = targetdf.select_dtypes(include=['object']).columns
  # Apply LabelEncoder to each object column
  for column in object_columns:
      targetdf[column + '_encoded'] = label_encoder.fit_transform(targetdf[column])
  targetdf = targetdf.drop(columns=object_columns)

  x["dataset"].data.targets = targetdf


## functions

In [4]:
import csv

def binary_conversion(X, dim):
    Xbin = np.zeros(dim)
    for d in range(dim):
        if X[d] > 0.5:
            Xbin[d] = 1
        else:
            Xbin[d] = 0

    return Xbin


def save_results(g_best, method_name, num_features,datasetName):
    
    selectedFeaturesSubset = binary_conversion(g_best.solution,num_features)
    featuresCount = int(sum(selectedFeaturesSubset))
    bestFitness = g_best.target.fitness
    
    print(f"Solution: {str(selectedFeaturesSubset)}, Fitness: {g_best.target.fitness}")
    print(f"No of features: {str(featuresCount)}")
    print(f"Reached accuracy: {str(checkAccuracy(selectedFeaturesSubset))}");
    
  
    res = {
        "Method": method_name,
        "NumberOfFeatures": str(featuresCount),
        "Accuracy": checkAccuracy(selectedFeaturesSubset),
        "Fitness": bestFitness
    }
    # Save to CSV or any other preferred format
    # Example: Using pandas to save as CSV
    import pandas as pd
    df = pd.DataFrame([res])
    df.to_csv(f"{datasetName}_results.csv", mode='a', index=False, header=not pd.io.common.file_exists(f"{datasetName}_results.csv"))
    

## method

In [5]:
import numpy as np
from mealpy import FloatVar, GA,SFO,WOA,EHO,GWO,DO,MVO,MFO
from sklearn.model_selection import cross_val_score
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn import preprocessing





def checkAccuracy(features):
    max_feat = len(features)
    num_feat = np.sum(binary_conversion(features,num_features) == 1)
    if num_feat == 0:
        return 0

    targets = np.array(y).ravel()
    selected_features = binary_conversion(features,num_features);
    f =  X.iloc[:, selected_features == 1]
    X_train, X_test, y_train, y_test = train_test_split(f, targets, test_size=0.3)
    svm = SVC(kernel='linear')
    svm.fit(X_train, y_train)
    # Predict on test set
    y_pred = svm.predict(X_test)

    num_valid = np.size(X_test, 0)
    yvalid  = y_test.reshape(num_valid)
    accuracy = accuracy_score(y_test, y_pred)
    return accuracy

def objective_function(features):
    max_feat = len(features)
    num_feat = np.sum(binary_conversion(features,num_features) == 1)
    
    # If no features or all features are selected, return a poor score
    if num_feat == 0 or num_feat == max_feat:
        return 0  # Adjusted to return a low score, not 1


    alpha = 0.6
    beta = 0.4

    targets = np.array(y).ravel()
    selected_features = binary_conversion(features,num_features);

    f =  X.iloc[:, selected_features == 1]
    X_train, X_test, y_train, y_test = train_test_split(f, targets, test_size=0.3)
    svm = SVC(kernel='linear')
    svm.fit(X_train, y_train)
    # Predict on test set
    y_pred = svm.predict(X_test)

    num_valid = np.size(X_test, 0)
    yvalid  = y_test.reshape(num_valid)

    # Calculate accuracy
    accuracy = accuracy_score(y_test, y_pred)
    
    cost = alpha * accuracy - beta * (num_feat / max_feat)
    return cost



for dataset in Datasets:
    X = dataset["dataset"].data.features
    y = dataset["dataset"].data.targets

    num_features = X.shape[1]
    search_space = np.array([0, 1] * num_features)

    lb = np.zeros(num_features)
    ub = np.ones(num_features)


    problem_dict = {
    "bounds": FloatVar(lb=lb, ub=ub, ),
    "obj_func": objective_function,
    "minmax": "max"
    }
    
    
    SFOModel = SFO.OriginalSFO(epoch=20,pop_size=30, pp = 0.2, AP = 4,epsilon = 0.0001)
    g_best = SFOModel.solve(problem_dict)
    save_results(g_best,"SFO", num_features, dataset["name"])

    GAmodel = GA.BaseGA(epoch=20,pop_size=20,pc=0.9, pm=0.05)
    g_best = GAmodel.solve(problem_dict)
    save_results(g_best,"GA", num_features, dataset["name"])

    EHOModel = EHO.OriginalEHO(epoch=20,pop_size=20, alpha = 0.5, beta = 0.5, n_clans = 2)
    g_best = EHOModel.solve(problem_dict)
    save_results(g_best,"EHO", num_features, dataset["name"])

    GWOModel = GWO.OriginalGWO(epoch=20,pop_size=20)
    g_best = GWOModel.solve(problem_dict)
    save_results(g_best,"GWO", num_features, dataset["name"])

    WOAModel = WOA.OriginalWOA(epoch=20,pop_size=20)
    g_best = WOAModel.solve(problem_dict)
    save_results(g_best,"WOA", num_features, dataset["name"])

    DOModel = DO.OriginalDO(epoch=20,pop_size=20, pr=0.03) 
    g_best = DOModel.solve(problem_dict)
    save_results(g_best,"DO", num_features, dataset["name"])
    
    MFOModel = MFO.OriginalMFO(epoch=20,pop_size=20)
    g_best = MFOModel.solve(problem_dict)
    save_results(g_best,"MFO", num_features, dataset["name"])


2024/05/18 01:08:33 PM, INFO, mealpy.swarm_based.SFO.OriginalSFO: Solving single objective optimization problem.
2024/05/18 01:08:51 PM, INFO, mealpy.swarm_based.SFO.OriginalSFO: >>>Problem: P, Epoch: 1, Current best: 0.4965811965811966, Global best: 0.4965811965811966, Runtime: 2.45685 seconds
2024/05/18 01:08:55 PM, INFO, mealpy.swarm_based.SFO.OriginalSFO: >>>Problem: P, Epoch: 2, Current best: 0.4965811965811966, Global best: 0.4965811965811966, Runtime: 3.99509 seconds


KeyboardInterrupt: 

# create plots

In [None]:

import numpy as np
import matplotlib.pyplot as plt



# set width of bar
barWidth = 0.25
fig = plt.subplots(figsize =(12, 8))


SFO = [];
GA = [];

DatasetsName = []


for x in Datasets:
  SFO.append(float(x["Accuracy_SFO"]) * 100)
  GA.append(float(x["Accuracy_GA"]) * 100)
  DatasetsName.append(x["name"])


# Set position of bar on X axis
br1 = np.arange(len(SFO))
br2 = [x + barWidth for x in br1]
br3 = [x + barWidth for x in br2]

# Make the plot
plt.bar(br1, SFO, color ='r', width = barWidth,
        edgecolor ='grey', label ='SFO')
plt.bar(br2, GA, color ='g', width = barWidth,
        edgecolor ='grey', label ='GA')

# Adding Xticks
plt.xlabel('Branch', fontweight ='bold', fontsize = 15)
plt.ylabel('Students passed', fontweight ='bold', fontsize = 15)
plt.xticks([r + barWidth for r in range(len(SFO))],
       DatasetsName)

plt.legend()
plt.show()