### Importing necessary libraries

In [187]:
import numpy as np
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.model_selection import train_test_split
import pandas as pd
import warnings
warnings.filterwarnings('ignore')

### PSOLDA class

In [188]:
def pso_lda(X, y, n_particles=50, n_iterations=200, w=0.7, c1=1.4, c2=1.4):
    n_features = X.shape[1]
    lb = np.ones(n_features) * -10
    ub = np.ones(n_features) * 10
    # Initialize particles
    particles = np.random.uniform(low=lb, high=ub, size=(n_particles, n_features))
    velocities = np.zeros((n_particles, n_features))
    best_particle_positions = particles.copy()
    best_particle_scores = np.zeros(n_particles)
    global_best_particle_position = np.zeros(n_features)
    global_best_particle_score = -np.inf

    for i in range(n_iterations):
        for j in range(n_particles):
            # Evaluate score for current particle
            mask = particles[j] > 0
            selected_features = np.where(mask)[0]
            if len(selected_features) == 0:
                score = -np.inf
            else:
                X_selected = X[:, selected_features]
                clf = LinearDiscriminantAnalysis()
                clf.fit(X_selected, y)
                score = clf.score(X_selected, y)

            # Update best particle positions and scores
            if score > best_particle_scores[j]:
                best_particle_scores[j] = score
                best_particle_positions[j] = particles[j]

            if score > global_best_particle_score:
                global_best_particle_score = score
                global_best_particle_position = particles[j]

            # Update velocities and particle positions
            r1, r2 = np.random.rand(), np.random.rand()
            velocities[j] = (w * velocities[j] +
                             c1 * r1 * (best_particle_positions[j] - particles[j]) +
                             c2 * r2 * (global_best_particle_position - particles[j]))
            particles[j] = particles[j] + velocities[j]
            particles[j] = np.clip(particles[j], lb, ub)

    # Select best features
    mask = global_best_particle_position > 0
    selected_features = np.where(mask)[0]

    if len(selected_features) == 0:
        raise ValueError("PSO-LDA failed to select any features")

    X_selected = X[:, selected_features]

    # Train LDA on selected features
    clf = LinearDiscriminantAnalysis()
    clf.fit(X_selected, y)

    return clf, selected_features


### Reading data from csv and storing in dataframe

In [189]:
# Generate a random classification dataset
df = pd.read_csv('../datasets/sonar_csv.csv')

#### X, Y split


In [190]:
x_df = df.iloc[:, :-1]
y_df = df.iloc[:, -1:]

#### Mapping y_df to integers

In [191]:
mapping = dict()
cnt = 0
for i in y_df['Class'].unique():
    if mapping.get(i) == None:
        mapping[i] = cnt
        cnt += 1
y_df['Class'] = y_df['Class'].map(mapping)

#### Converting X, y to numpy array 

In [192]:
x_df = x_df.to_numpy()
y_df = y_df.to_numpy().reshape((y_df.shape[0], ))
x_df.shape, y_df.shape

((208, 60), (208,))

#### Splitting in training and testing 

In [193]:
X_train, X_test, y_train, y_test = train_test_split(x_df, y_df, test_size=0.2, random_state=15)

### Run PSO-LDA feature selection on training set

In [194]:
clf, selected_features = pso_lda(X_train, y_train)


#### Test classifier on test set using only selected features from dataset

In [195]:

X_test_selected = X_test[:, selected_features]
test_score = clf.score(X_test_selected, y_test)


### Initialize LDA model and fit to data and make prediction on test data

In [196]:
lda = LinearDiscriminantAnalysis()
lda.fit(X_train, y_train)
y_pred = lda.predict(X_test)

### Evaluating accuracy of predictions with and without PSOLDA

In [197]:
accuracy = lda.score(X_test, y_test)
print("Test score without PSOLDA:", accuracy)

print("Selected features:", list(selected_features))
print("Test score:", test_score)

Test score without PSOLDA: 0.6904761904761905
Selected features: [1, 3, 8, 11, 15, 16, 18, 22, 23, 24, 25, 27, 28, 29, 30, 34, 35, 36, 37, 38, 39, 40, 42, 43, 44, 48, 49, 50, 52, 54, 57, 58, 59]
Test score: 0.7857142857142857


## As we can see from above example PSOLDA performs much better than traditional LDA