In [22]:
import numpy as np
import pandas as pd
from sklearn.metrics import accuracy_score

import warnings
warnings.filterwarnings("ignore")

# Read dataset
data = pd.read_csv('alzheimer.csv')

In [23]:
# Encode the group column
data.Group = data.Group.map({'Nondemented':1,'Demented':0})

# Encode the gender column
data.Gender = data.Gender.map({'M': 1, 'F': 0})

In [24]:
data.fillna(data.median(), inplace=True)

data.isnull().sum()

Group     0
Gender    0
Age       0
EDUC      0
SES       0
MMSE      0
CDR       0
eTIV      0
nWBV      0
ASF       0
dtype: int64

In [25]:
features=data.drop(columns='Group', axis=1)
labels=data.loc[:,'Group']

In [26]:
class WOA_Disease:
    def __init__(self, max_iter=200, population_size=20, a=0.5, b=1):
        self.max_iter = max_iter
        self.population_size = population_size
        self.a = a
        self.b = b

    def fit(self, data, labels):
        self.data = data
        self.labels = labels
# Initialize population with all features selected
        self.population = np.ones((self.population_size, data.shape[1]))

        for epoch in range(self.max_iter):
            # Update population
            self.update_population()

        # Select best solution
        best_fitness = min(self.evaluate_fitness())
        best_index = np.argmin(self.evaluate_fitness())
        self.best_solution = self.population[best_index]
        self.best_features = self.data.columns[self.best_solution.astype(bool)].tolist()

    def update_population(self):
        for i in range(self.population_size):
            r = np.random.rand()
#search
            A = 2 * self.a * r - self.a
            C = 2 * r
            p = np.random.rand()
#encircling the prey
            if p < 0.5:
                if np.abs(A) < 1:
                    self.population[i] = self.search_preys(A, C, i)
                else:
                    rand_leader_index = np.random.randint(0, self.population_size)
                    rand_leader = self.population[rand_leader_index]
                    self.population[i] = rand_leader + A * (rand_leader - self.population[i])
#bubble net attacking
            else:
                distance_to_leader = np.abs(self.population[i] - self.population[0])
                self.population[i] = distance_to_leader * np.exp(self.b * C) * np.cos(2 * np.pi * C) + self.population[0]

    def evaluate_fitness(self):
        fitness = []
        for solution in self.population:
            selected_features = self.data.columns[solution.astype(bool)].tolist()
            # Here, you would apply your liver disease prediction model to evaluate the fitness of each solution
            # For demonstration purposes, let's assume a simple fitness based on the difference between predicted and actual labels
            # Random prediction for demonstration
            predicted_labels = np.random.randint(0, 2, size=len(self.labels))  
            fitness.append(np.abs(predicted_labels - self.labels).sum())
        return fitness
        

    def search_preys(self, A, C, i):
        # Ensure that the solution remains unchanged
        return self.population[i]

In [27]:
# Initialize and fit the model
woa_disease = WOA_Disease()
woa_disease.fit(features, labels)

In [28]:
# Get the best solution and selected features
best_solution = woa_disease.best_solution
selected_features = woa_disease.best_features

In [29]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(features[selected_features], labels, test_size=0.2, random_state=42)

# Train your classification model using the selected features
from sklearn.ensemble import RandomForestClassifier
model = RandomForestClassifier()
model.fit(X_train, y_train)

# Predict labels for the test set
predicted_labels = model.predict(X_test)

# Calculate accuracy
accuracy = accuracy_score(y_test, predicted_labels)


In [30]:
print("Best solution:", best_solution)
print("Selected features:", selected_features)
print("Accuracy:", accuracy*100)

Best solution: [1. 1. 1. 1. 1. 1. 1. 1. 1.]
Selected features: ['Gender', 'Age', 'EDUC', 'SES', 'MMSE', 'CDR', 'eTIV', 'nWBV', 'ASF']
Accuracy: 94.66666666666667
