In [127]:
import pandas as pd
import numpy as np
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score

# read datasets
train_df = pd.read_csv('BCD_Train.csv')
test_df = pd.read_csv('BCD_Test.csv')

# label for classificaiton
X_train = train_df.drop('Diagnosis', axis=1)  
y_train = train_df['Diagnosis']

X_test = test_df.drop('Diagnosis', axis=1)
y_test = test_df['Diagnosis']

def employedBee(X_train, y_train, X_test, y_test, selected_features, n_employed):

    # initial valuess
    best_features = selected_features.copy()
    best_accuracy = 0

    for bee in range(n_employed):
        employed_bees = np.random.choice(np.where(selected_features == 1)[0], size=n_employed, replace=True)
        
        candidate_features = selected_features.copy()
        for bee in employed_bees:
            candidate_features[bee] = 1 if np.random.rand() < 0.5 else 0

        # find best
        knn_model = KNeighborsClassifier(n_neighbors=3)
        knn_model.fit(X_train.iloc[:, candidate_features == 1], y_train)
        y_pred = knn_model.predict(X_test.iloc[:, candidate_features == 1])
        accuracy = accuracy_score(y_test, y_pred)
        # update
        if accuracy > best_accuracy:
            best_accuracy = accuracy
            best_features = candidate_features.copy()
    
    return best_features, best_accuracy


def onlookerBee(X_train, y_train, X_test, y_test, employed_features, n_onlookers):

    # initial values
    best_features = employed_features.copy()
    best_accuracy = 0

    for onlookerBee in range(n_onlookers):
        onlooker_features = np.random.choice(employed_features, size=n_onlookers, replace=True)
        
        candidate_features = np.zeros(X_train.shape[1])
        candidate_features[onlooker_features] = 1

        # KNN
        knn_model = KNeighborsClassifier(n_neighbors=3)
        knn_model.fit(X_train.iloc[:, candidate_features == 1], y_train)
        y_pred = knn_model.predict(X_test.iloc[:, candidate_features == 1])
        accuracy = accuracy_score(y_test, y_pred)

        # update
        if accuracy > best_accuracy:
            best_accuracy = accuracy
            best_features = candidate_features.copy()


    return best_features, best_accuracy

    
# main function
def feature_selection_ABC(X_train, y_train, X_test, y_test, n_iterations=100, n_employed=10, n_onlookers = 10, limit=5):

    num_features = X_train.shape[1]

     # choose random features
    selected_features = np.random.choice([0, 1], size=num_features)

    # initial values
    best_features = selected_features.copy()
    best_accuracy = 0

    for iteration in range(n_iterations):
        employed_features, employed_accuracy = employedBee(X_train, y_train, X_test, y_test, best_features, n_employed)
        onlooker_features, onlooker_accuracy = onlookerBee(X_train, y_train, X_test, y_test, employed_features, n_onlookers)
        
        if employed_accuracy > onlooker_accuracy:
            selected_features = employed_features
        else:
            selected_features = onlooker_features

        # random changing to selected features
        for feature in selected_features:
            selected_features[feature] = 1 if np.random.rand() < 0.5 else 0

        # KNN 
        knn_model = KNeighborsClassifier(n_neighbors=3)
        knn_model.fit(X_train.iloc[:, selected_features == 1], y_train)
        y_pred = knn_model.predict(X_test.iloc[:, selected_features == 1])
        accuracy = accuracy_score(y_test, y_pred)

        # update best accuracy
        if accuracy > best_accuracy:
            best_accuracy = accuracy
            best_features = selected_features.copy()

       # scout bee
        if accuracy < best_accuracy:
            # generate a new random solution
            selected_features = np.random.choice([0, 1], size=num_features)

    print("best features", best_features)
    return best_features, best_accuracy

# select features
selected_features_ABC = feature_selection_ABC(X_train, y_train, X_test, y_test)

# KNN with selected features
knn_model_final = KNeighborsClassifier(n_neighbors=3)
knn_model_final.fit(X_train.iloc[:, selected_features_ABC[0] == 1], y_train)

# test
y_pred_final = knn_model_final.predict(X_test.iloc[:, selected_features_ABC[0] == 1])
accuracy_final = accuracy_score(y_test, y_pred_final)
print("Final accuracy:", accuracy_final)

best features [1 1 1 0 0 0 0 1 0 1 0 0 0 0 1 0 0 0 0 0 0 1 1 0 0 0 1 1 1 0]
Final accuracy: 0.9577464788732394


In [134]:
import numpy as np
import sklearn.datasets
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score

class Particle:
    def __init__(self, dimension):
        self.position = np.random.rand(dimension) > 0.5  # Random binary position
        self.velocity = np.random.rand(dimension) - 0.5
        self.best_position = np.copy(self.position)
        self.best_score = -np.inf

class PSO:
    def __init__(self, n_particles, dimension, fitness_func):
        self.particles = [Particle(dimension) for _ in range(n_particles)]
        self.global_best_position = np.zeros(dimension, dtype=bool)
        self.global_best_score = -np.inf
        self.fitness_func = fitness_func

    def update_velocity(self, particle):
        w = 0.5  # inertia weight
        c1 = c2 = 1.5  # cognitive and social parameters
        r1, r2 = np.random.rand(2)  # cognitive and social random numbers

        cognitive_velocity = c1 * r1 * (particle.best_position != particle.position)  # Use XOR for binary position
        social_velocity = c2 * r2 * (self.global_best_position != particle.position)  # Use XOR for binary position
        inertia_velocity = w * particle.velocity

        new_velocity = inertia_velocity + cognitive_velocity + social_velocity
        particle.velocity = np.clip(new_velocity, -1, 1)

    def update_position(self, particle):
        # Update position using a sigmoid function to convert velocity to a probability
        particle.position = np.random.rand(particle.position.size) < (1 / (1 + np.exp(-particle.velocity)))
        particle.position = particle.position.astype(bool)

    def evaluate(self, particle):
        score = self.fitness_func(particle.position)
        if score > particle.best_score:
            particle.best_score = score
            particle.best_position = np.copy(particle.position)
        if score > self.global_best_score:
            self.global_best_score = score
            self.global_best_position = np.copy(particle.position)

    def run(self, n_iterations):
        for _ in range(n_iterations):
            for particle in self.particles:
                self.update_velocity(particle)
                self.update_position(particle)
                self.evaluate(particle)

# Example usage:
# Define a fitness function for feature selection
def fitness_func(position):
    selected_features = position.astype(bool)
    if not np.any(selected_features):
        return 0  # Avoid using no features
    
    X_train_selected = X_train[:, selected_features]
    X_test_selected = X_test[:, selected_features]

    clf = RandomForestClassifier()
    clf.fit(X_train_selected, y_train)
    predictions = clf.predict(X_test_selected)
    return accuracy_score(y_test, predictions)

# Veri setlerini yükleme
train_df = pd.read_csv('BCD_Train.csv')
test_df = pd.read_csv('BCD_Test.csv')

# Sınıflandırma için etiketler
X_train = train_df.drop('Diagnosis', axis=1).values  
y_train = train_df['Diagnosis'].values

X_test = test_df.drop('Diagnosis', axis=1).values
y_test = test_df['Diagnosis'].values


# Run PSO for feature selection
pso = PSO(n_particles=30, dimension=X_train.shape[1], fitness_func=fitness_func)
pso.run(n_iterations=20)


# Selected features'i kullanarak eğitim ve test setlerini güncelle
X_train_selected = X_train[:, best_features.astype(bool)]
X_test_selected = X_test[:, best_features.astype(bool)]

# Sınıflandırıcıyı eğit
clf = RandomForestClassifier()
clf.fit(X_train_selected, y_train)

# Test seti üzerinde tahmin yap
predictions = clf.predict(X_test_selected)

# Doğruluk hesapla
accuracy = accuracy_score(y_test, predictions)
print("Accuracy:", accuracy)


# Get the best solution
best_features = pso.global_best_position.astype(int)

print("Selected Features:", best_features)


ay 30
Accuracy: 0.9788732394366197
Selected Features: [1 1 1 0 0 0 1 0 0 1 1 0 0 1 1 1 1 0 0 1 1 1 1 1 1 0 0 1 0 0]
